diff --git a/.github/workflows/sync-to-hf.yml b/.github/workflows/sync-to-hf.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d886dc8ac5597eb11814533125ee95a93af7b3eb
--- /dev/null
+++ b/.github/workflows/sync-to-hf.yml
@@ -0,0 +1,55 @@
+name: Sync to HuggingFace Dataset
+
+on:
+ push:
+ branches: [main]
+ paths:
+ - 'data/**/*.json'
+ workflow_dispatch: # Allow manual trigger
+
+jobs:
+ sync-to-huggingface:
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 2
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.11'
+
+ - name: Install dependencies
+ run: |
+ pip install datasets huggingface_hub pandas pyarrow
+
+ - name: Convert Changed JSONs to Parquet (Optimized)
+ env:
+ HF_DATASET_REPO: deepmage121/eee_test
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ run: |
+ echo "Detecting changed leaderboards..."
+ python scripts/convert_to_parquet.py
+
+ - name: Upload Changed Parquets to HuggingFace
+ env:
+ HF_DATASET_REPO: deepmage121/eee_test
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
+ run: |
+ echo "Uploading changed parquets..."
+ python scripts/upload_to_hf.py
+
+ - name: Report status
+ if: success()
+ run: |
+ echo "Successfully synced to HuggingFace dataset"
+ echo "View at: https://huggingface.co/datasets/deepmage121/eee_test"
+ if [ -f parquet_output/changed_leaderboards.json ]; then
+ echo ""
+ echo "Changes processed:"
+ cat parquet_output/changed_leaderboards.json
+ fi
+
diff --git a/.gitignore b/.gitignore
index e43b0f988953ae3a84b00331d0ccf5f7d51cb3cf..f66c0318c86fa7c5971c863094b4f0a45f2c4d01 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,9 @@
.DS_Store
+.secrets
+.actrc
+__pycache__/
+*.pyc
+parquet_output/
+*.venv*
+*.md
+*.ipynb_checkpoints
diff --git a/.python-version b/.python-version
new file mode 100644
index 0000000000000000000000000000000000000000..2c0733315e415bfb5e5b353f9996ecd964d395b2
--- /dev/null
+++ b/.python-version
@@ -0,0 +1 @@
+3.11
diff --git a/app.py b/app.py
index 8c64d41b1c2e8f7e742563979659bc7e7bc69215..4132024d38460461651d2af4f7f8bf3035e413bf 100644
--- a/app.py
+++ b/app.py
@@ -1,479 +1,106 @@
+"""
+Evaluation Leaderboard - Gradio Interface
+Displays model evaluation results from HuggingFace datasets.
+"""
import gradio as gr
import pandas as pd
-import json
from pathlib import Path
-DATA_DIR = Path("leaderboard_data")
-LEADERBOARD_CACHE = {}
-
-def parse_eval_json(file_path):
- """Parses a single JSON file to extract model, provider, and results."""
- try:
- with open(file_path, 'r') as f:
- data = json.load(f)
-
- leaderboard_name = data.get("evaluation_source", {}).get("evaluation_source_name", "Unknown Leaderboard")
- provider_name = data.get("source_metadata", {}).get("source_organization_name", "Unknown Provider")
- model_id = data.get("model_info", {}).get("id", "Unknown Model")
- developer_name = data.get("model_info", {}).get("developer", "Unknown Developer")
-
- params = data.get("model_info", {}).get("params_billions", None)
- architecture = data.get("model_info", {}).get("architecture", "Unknown")
- precision = data.get("additional_details", {}).get("precision", "Unknown")
- if precision == "Unknown":
- precision = data.get("model_info", {}).get("precision", "Unknown")
-
- results = {}
- if "evaluation_results" in data:
- for res in data["evaluation_results"]:
- eval_name = res.get("evaluation_name", "Unknown Metric")
- score = res.get("score_details", {}).get("score", None)
- if score is not None:
- results[eval_name] = score
-
- return {
- "leaderboard": leaderboard_name,
- "provider": provider_name,
- "model": model_id,
- "developer": developer_name,
- "params": params,
- "architecture": architecture,
- "precision": precision,
- "results": results,
- "raw_data": data
- }
- except Exception as e:
- print(f"Error parsing {file_path}: {e}")
- return None
-
-def get_available_leaderboards():
- """Scans data directory for leaderboard folders."""
- if not DATA_DIR.exists():
- return []
- return [d.name for d in DATA_DIR.iterdir() if d.is_dir()]
-
-def normalize_leaderboard_name(name):
- """Normalizes leaderboard name to remove spaces."""
- return name.replace(" ", "")
-
-def sanitize_filename_component(name):
- """Sanitizes a name to be safe for use in directory names."""
- return name.replace("/", "_").replace("\\", "_").replace(":", "_").strip()
-
-def walk_eval_files(leaderboard_name):
- """Generator that walks through Leaderboard directory recursively."""
- lb_path = DATA_DIR / leaderboard_name
- if not lb_path.exists():
- return
-
- yield from lb_path.rglob("*.json")
+# Import custom modules
+from data_loader import (
+ load_hf_dataset_on_startup,
+ get_available_leaderboards,
+ get_eval_metadata,
+ build_leaderboard_table,
+ clear_cache,
+ DATA_DIR
+)
+from ui_components import get_theme, get_custom_css, format_leaderboard_header, format_metric_details
-def get_eval_metadata(selected_leaderboard):
- """Extracts evaluation metadata from the leaderboard data."""
- if not selected_leaderboard:
- return {}
-
- eval_metadata = {"evals": {}, "source_info": {}}
-
- for json_file in walk_eval_files(selected_leaderboard):
- parsed = parse_eval_json(json_file)
- if parsed:
- if not eval_metadata["source_info"]:
- source_meta = parsed["raw_data"].get("source_metadata", {})
- source_data_list = parsed["raw_data"].get("source_data", [])
- url = source_data_list[0] if isinstance(source_data_list, list) and source_data_list else "#"
-
- eval_metadata["source_info"] = {
- "organization": source_meta.get("source_organization_name", "Unknown"),
- "relationship": source_meta.get("evaluator_relationship", "Unknown"),
- "url": url
- }
-
- if "evaluation_results" in parsed["raw_data"]:
- for res in parsed["raw_data"]["evaluation_results"]:
- eval_name = res.get("evaluation_name", "Unknown Metric")
- if eval_name not in eval_metadata["evals"]:
- metric_config = res.get("metric_config", {})
- eval_metadata["evals"][eval_name] = {
- "description": metric_config.get("evaluation_description", "No description available"),
- "score_type": metric_config.get("score_type", "unknown"),
- "lower_is_better": metric_config.get("lower_is_better", False),
- "min_score": metric_config.get("min_score"),
- "max_score": metric_config.get("max_score"),
- "level_names": metric_config.get("level_names", []),
- "level_metadata": metric_config.get("level_metadata", []),
- "has_unknown_level": metric_config.get("has_unknown_level", False)
- }
- break
-
- return eval_metadata
-def format_eval_info_html(selected_leaderboard):
- """Formats evaluation metadata into a responsive HTML grid."""
+def export_leaderboard_to_json(selected_leaderboard):
+ """Export current leaderboard to JSON files in a zip using parquet_to_folder."""
if not selected_leaderboard:
- return """
-
-
đ Welcome to Eval Leaderboard
-
Select a leaderboard above to visualize results and metadata.
-
- """
-
- metadata = get_eval_metadata(selected_leaderboard)
- if not metadata or not metadata.get("evals"):
- return f"""No metadata found for {selected_leaderboard}
"""
-
- source_info = metadata.get("source_info", {})
- evals = metadata.get("evals", {})
- unique_evals_count = len(evals)
-
- eval_badges = "".join([
- f'{name}'
- for name in sorted(evals.keys())
- ])
-
- source_url = source_info.get('url', '#')
- source_link = f'đ {source_info.get("organization", "Unknown")}'
-
- html = f"""
-
-
đ {selected_leaderboard}
-
-
-
Source Organization
-
{source_link}
-
-
-
Evaluator Relationship
-
{source_info.get('relationship', 'Unknown').replace('_', ' ').title()}
-
-
-
Included Evaluations
-
{eval_badges}
-
-
-
-
- Metric Details
- """
+ return None
- html += """
-
- """
+ import tempfile
+ import shutil
+ import zipfile
+ from json_to_parquet import parquet_to_folder
- for eval_name, info in evals.items():
- score_type = info['score_type'].upper() if info['score_type'] else "UNKNOWN"
- direction = "Lower is better" if info['lower_is_better'] else "Higher is better"
- direction_icon = "â" if info['lower_is_better'] else "â"
+ try:
+ # Find the parquet file in DATA_DIR
+ parquet_path = DATA_DIR / selected_leaderboard / f"{selected_leaderboard}.parquet"
- details_content = ""
- if info['score_type'] == "continuous" and info.get('min_score') is not None:
- details_content += f"
Range: [{info['min_score']} - {info['max_score']}]
"
- elif info['score_type'] == "levels" and info.get('level_names'):
- levels = ", ".join(info['level_names'])
- details_content += f"
Levels: {levels}
"
+ if not parquet_path.exists():
+ print(f"Parquet file not found: {parquet_path}")
+ return None
- if info.get('has_unknown_level'):
- details_content += "
* -1 indicates Unknown
"
-
- html += f"""
-
-
-
- đˇī¸
- {eval_name}
-
-
- {direction_icon} {direction}
-
-
+ # Create temp directory for export
+ with tempfile.TemporaryDirectory() as temp_dir:
+ temp_path = Path(temp_dir)
+ output_dir = temp_path / "json_export"
+ output_dir.mkdir()
-
-
- {info['description']}
-
-
-
- {details_content}
-
-
{score_type}
-
-
-
- """
-
- html += "
"
- return html
+ # Use the round-trip functionality from json_to_parquet
+ parquet_to_folder(str(parquet_path), str(output_dir))
+
+ # Create zip file
+ zip_path = temp_path / f"{selected_leaderboard}_export.zip"
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
+ for json_file in output_dir.rglob("*.json"):
+ arcname = json_file.relative_to(output_dir)
+ zipf.write(json_file, arcname)
+
+ # Copy to a permanent location for download
+ final_zip = Path(tempfile.gettempdir()) / f"{selected_leaderboard}_export.zip"
+ shutil.copy(zip_path, final_zip)
+
+ return str(final_zip)
+ except Exception as e:
+ print(f"Export error: {e}")
+ return None
+
-def update_leaderboard_table(selected_leaderboard, search_query="", group_by_model=False, progress=gr.Progress()):
+def update_leaderboard_table(selected_leaderboard, search_query="", progress=gr.Progress()):
"""Loads and aggregates data for the selected leaderboard."""
if not selected_leaderboard:
- return pd.DataFrame(), format_eval_info_html(None)
+ return pd.DataFrame(), "", format_leaderboard_header(None, {}), format_metric_details(None, {})
- # Check cache
- full_df = None
- if selected_leaderboard in LEADERBOARD_CACHE:
- # Cache stores (df, meta_html)
- full_df, meta_html = LEADERBOARD_CACHE[selected_leaderboard]
- else:
- progress(0, desc=f"Scanning {selected_leaderboard}...")
- all_files = list(walk_eval_files(selected_leaderboard))
- total_files = len(all_files)
-
- rows = []
- for i, json_file in enumerate(all_files):
- if i % 100 == 0:
- progress((i / total_files), desc=f"Loading {selected_leaderboard}...")
- parsed = parse_eval_json(json_file)
- if parsed:
- row = {
- "Model": parsed["model"],
- "Developer": parsed["developer"],
- "Params (B)": parsed["params"],
- "Arch": parsed["architecture"],
- "Precision": parsed["precision"]
- }
- row.update(parsed["results"])
- rows.append(row)
-
- meta_html = format_eval_info_html(selected_leaderboard)
-
- if not rows:
- full_df = pd.DataFrame(columns=["Model", "Developer", "Params (B)", "Arch", "Precision", "Score"])
- else:
- full_df = pd.DataFrame(rows)
- numeric_cols = full_df.select_dtypes(include=['float', 'int']).columns
- full_df[numeric_cols] = full_df[numeric_cols].round(3)
-
- LEADERBOARD_CACHE[selected_leaderboard] = (full_df, meta_html)
-
- # Filter by search query
- df = full_df.copy()
- if search_query:
- df = df[
- df["Model"].str.contains(search_query, case=False, na=False) |
- df["Developer"].str.contains(search_query, case=False, na=False)
- ]
-
- # Group by model and average scores if requested
- if group_by_model and not df.empty:
- # Identify grouping columns (non-numeric usually, or specific base cols)
- # We group by the base identifiers.
- base_cols_all = ["Model", "Developer", "Params (B)", "Arch", "Precision"]
- group_cols = [c for c in base_cols_all if c in df.columns]
-
- # Identify columns to average (numeric)
- numeric_cols = df.select_dtypes(include=['number']).columns
- # Exclude group_cols from numeric_cols if they happen to be numeric (like Params)
- # But groupby keys can be numeric.
- # We want to average the SCORES.
- # Any numeric column NOT in group_cols should be averaged.
- agg_cols = [c for c in numeric_cols if c not in group_cols]
-
- if group_cols and agg_cols:
- df = df.groupby(group_cols)[agg_cols].mean().reset_index()
- df = df.round(3)
-
- # Drop columns where all values are null
- df = df.dropna(axis=1, how='all')
-
- if df.empty:
- return df, meta_html
-
- # Filter base_cols to only include columns that exist in df (in case some were dropped)
- base_cols = [c for c in ["Model", "Developer", "Params (B)", "Arch", "Precision"] if c in df.columns]
- eval_cols = [c for c in df.columns if c not in base_cols]
-
- cols = base_cols + eval_cols
- return df[cols], meta_html
-
-def find_json_files(path):
- """Recursively finds all JSON files in a directory or returns the file if it's a JSON file."""
- json_files = []
- path_obj = Path(path)
-
- if path_obj.is_file() and path_obj.suffix == ".json":
- json_files.append(path_obj)
- elif path_obj.is_dir():
- json_files.extend(path_obj.rglob("*.json"))
-
- return json_files
-
-def check_is_duplicate(save_dir, new_eval_id):
- """Checks if a file with the same evaluation_id already exists in the directory."""
- if not new_eval_id or not save_dir.exists():
- return False
-
- for existing_file in save_dir.glob("*.json"):
- try:
- with open(existing_file, 'r') as f:
- data = json.load(f)
- if data.get("evaluation_id") == new_eval_id:
- return True
- except:
- continue
- return False
-
-def handle_file_upload(files, progress=gr.Progress()):
- """Processes uploaded files/folders and saves them to the correct structure.
+ metadata = get_eval_metadata(selected_leaderboard)
- Structure: Leaderboard/Provider/Model/.json
- Preserves original filename (which already contains the UUID).
- """
- if not files:
- return gr.update(), "No files uploaded."
+ def progress_callback(value, desc):
+ progress(value, desc=desc)
- saved_count = 0
- all_json_files = []
- skipped_count = 0
- duplicate_count = 0
+ df = build_leaderboard_table(selected_leaderboard, "", progress_callback)
+ total_count = len(df)
- progress(0, desc="Scanning files...")
- for file_obj in files:
- path = file_obj.name if hasattr(file_obj, "name") else file_obj
- json_files = find_json_files(path)
-
- if Path(path).is_file() and Path(path).suffix != ".json":
- skipped_count += 1
-
- all_json_files.extend(json_files)
+ # Apply search filter (searches all columns)
+ if search_query and not df.empty:
+ mask = df.astype(str).apply(lambda row: row.str.contains(search_query, case=False, na=False).any(), axis=1)
+ df = df[mask]
- total_files = len(all_json_files)
- for i, json_file in enumerate(all_json_files):
- progress((i / total_files), desc=f"Processing {json_file.name}...")
- try:
- parsed = parse_eval_json(json_file)
- if not parsed:
- continue
-
- leaderboard = normalize_leaderboard_name(parsed["leaderboard"])
- provider = parsed["provider"]
- model_id = parsed["model"]
- developer = parsed["developer"]
- eval_id = parsed["raw_data"].get("evaluation_id")
-
- # Sanitize names for directory structure
- sanitized_provider = sanitize_filename_component(developer)
- sanitized_model = sanitize_filename_component(model_id)
-
- # Create structure: Leaderboard/Developer/Model
- save_dir = DATA_DIR / leaderboard / sanitized_provider / sanitized_model
- save_dir.mkdir(parents=True, exist_ok=True)
-
- # Check for duplicates based on evaluation_id
- if check_is_duplicate(save_dir, eval_id):
- duplicate_count += 1
- continue
-
- # Preserve original filename
- filename = json_file.name
- save_path = save_dir / filename
-
- # Avoid overwriting by appending counter
- counter = 1
- while save_path.exists():
- stem = save_path.stem.rsplit('_', 1)[0] if '_' in save_path.stem else save_path.stem
- save_path = save_dir / f"{stem}_{counter}.json"
- counter += 1
-
- with open(save_path, 'w') as f:
- json.dump(parsed["raw_data"], f, indent=2)
-
- saved_count += 1
-
- except Exception as e:
- print(f"Failed to save {json_file}: {e}")
-
- # Clear cache since data changed
- LEADERBOARD_CACHE.clear()
-
- # Refresh leaderboard choices
- choices = get_available_leaderboards()
+ # Build search status message
+ if search_query:
+ search_msg = f"Showing {len(df)} of {total_count} results for '{search_query}'"
+ else:
+ search_msg = f"Showing {len(df)} results"
- msg_parts = [f"Processed {saved_count} files."]
- if duplicate_count > 0:
- msg_parts.append(f"Skipped {duplicate_count} duplicates.")
- if skipped_count > 0:
- msg_parts.append(f"Skipped {skipped_count} non-JSON files.")
-
- return gr.Dropdown(choices=choices), " ".join(msg_parts), None, None
+ return df, search_msg, format_leaderboard_header(selected_leaderboard, metadata), format_metric_details(selected_leaderboard, metadata)
-# Professional, high-contrast theme
-theme = gr.themes.Soft(
- primary_hue="slate",
- neutral_hue="slate",
- font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"]
-).set(
- body_background_fill="var(--neutral-50)",
- block_background_fill="white",
- block_border_width="1px",
- block_title_text_weight="600"
-)
-
-css = """
-/* Clean up the global container */
-.gradio-container {
- max-width: 100% !important;
- padding: 0 2rem !important;
-}
-
-/* Table Styles */
-.dataframe {
- border: 1px solid var(--border-color-primary) !important;
- border-radius: 8px;
-}
-/* Hide file list in uploaders */
-.file-preview {
- display: none !important;
-}
-"""
+# Load HF dataset BEFORE building the interface
+load_hf_dataset_on_startup()
-with gr.Blocks(title="Eval Leaderboard", theme=theme, css=css) as demo:
+# Build Gradio interface
+with gr.Blocks(title="Eval Leaderboard", theme=get_theme(), css=get_custom_css()) as demo:
with gr.Row(variant="compact", elem_classes="header-row"):
with gr.Column(scale=1):
gr.Markdown("# đ Evaluation Leaderboard")
gr.Markdown("Analyze and compare model performance metrics.", elem_classes="subtitle")
- with gr.Row(variant="panel", equal_height=True):
+ with gr.Row(variant="panel"):
initial_choices = get_available_leaderboards()
initial_value = initial_choices[0] if initial_choices else None
@@ -482,56 +109,51 @@ with gr.Blocks(title="Eval Leaderboard", theme=theme, css=css) as demo:
choices=initial_choices,
value=initial_value,
label="Current Leaderboard",
- interactive=True,
- container=False,
- scale=1
- )
- with gr.Column(scale=2):
- search_box = gr.Textbox(
- label="Search Model/Developer",
- placeholder="đ Search model or developer...",
- show_label=False,
- container=False,
- scale=1
- )
- with gr.Column(scale=1, min_width=100):
- group_by_model = gr.Checkbox(
- label="Average by Model",
- value=False,
- container=False
+ interactive=True
)
- with gr.Column(scale=1, min_width=100):
+ with gr.Column(scale=3):
+ search_box = gr.Textbox(
+ label="Search",
+ placeholder="Type to search across all columns...",
+ show_label=False
+ )
+ with gr.Column(scale=1):
refresh_btn = gr.Button("đ Refresh", variant="secondary", size="sm")
- with gr.Accordion("đ¤ Upload New Data", open=False):
- upload_mode = gr.Radio(
- choices=["Files", "Folder"],
- value="Files",
- label="Upload Mode",
- info="Choose 'Files' for individual JSONs, or 'Folder' to upload a directory structure."
- )
-
- with gr.Group(visible=True) as file_upload_group:
- file_uploader_files = gr.File(
- file_count="multiple",
- file_types=[".json"],
- label="Select JSON Files"
- )
-
- with gr.Group(visible=False) as folder_upload_group:
- file_uploader_folder = gr.File(
- file_count="directory",
- label="Select Folder"
- )
-
- upload_status = gr.Textbox(
- label="Upload Status",
- interactive=False
- )
+ with gr.Accordion("âšī¸ How to Submit Data", open=False):
+ gr.Markdown("""
+### Submitting Evaluation Data
- init_df, init_meta = update_leaderboard_table(initial_value)
+**Data submissions happen via GitHub Pull Requests:**
+
+1. **Fork** [evaleval/every_eval_ever](https://github.com/evaleval/every_eval_ever)
+2. **Add your JSON files** to `data////`
+3. **Create a Pull Request**
+4. **Automated validation** checks your data
+5. **After merge**: GitHub Actions automatically syncs to HuggingFace
+6. **Refresh this page** to see your data!
+
+#### File Structure
+```
+data/
+âââ YourBenchmark/
+ âââ developer_name/
+ âââ model_name/
+ âââ {uuid}.json
+```
+
+Each JSON file should follow the schema and be named with a unique UUID.
+
+đ [**Full Submission Guide**](https://github.com/evaleval/every_eval_ever#contributor-guide) |
+đ [**JSON Schema**](https://github.com/evaleval/every_eval_ever/blob/main/eval.schema.json) |
+đ [**See Examples**](https://github.com/evaleval/every_eval_ever/tree/main/data)
+ """)
+
+ init_df, init_search_msg, init_header, init_metrics = update_leaderboard_table(initial_value)
+
+ header_view = gr.HTML(value=init_header)
- metadata_view = gr.HTML(value=init_meta)
+ search_info = gr.Markdown(value=init_search_msg)
leaderboard_table = gr.Dataframe(
value=init_df,
@@ -541,59 +163,33 @@ with gr.Blocks(title="Eval Leaderboard", theme=theme, css=css) as demo:
elem_classes="dataframe"
)
- def toggle_upload_input(mode):
- return {
- file_upload_group: gr.Group(visible=(mode == "Files")),
- folder_upload_group: gr.Group(visible=(mode == "Folder"))
- }
+ metrics_view = gr.HTML(value=init_metrics)
- upload_mode.change(
- fn=toggle_upload_input,
- inputs=[upload_mode],
- outputs=[file_upload_group, folder_upload_group]
- )
-
- file_uploader_files.upload(
- fn=handle_file_upload,
- inputs=[file_uploader_files],
- outputs=[leaderboard_selector, upload_status, file_uploader_files, file_uploader_folder]
- )
-
- file_uploader_folder.upload(
- fn=handle_file_upload,
- inputs=[file_uploader_folder],
- outputs=[leaderboard_selector, upload_status, file_uploader_files, file_uploader_folder]
- )
+ # Event handlers
leaderboard_selector.change(
fn=update_leaderboard_table,
- inputs=[leaderboard_selector, search_box, group_by_model],
- outputs=[leaderboard_table, metadata_view]
- )
-
- search_box.change(
- fn=update_leaderboard_table,
- inputs=[leaderboard_selector, search_box, group_by_model],
- outputs=[leaderboard_table, metadata_view]
+ inputs=[leaderboard_selector, search_box],
+ outputs=[leaderboard_table, search_info, header_view, metrics_view]
)
- group_by_model.change(
- fn=update_leaderboard_table,
- inputs=[leaderboard_selector, search_box, group_by_model],
- outputs=[leaderboard_table, metadata_view]
+ search_box.input(
+ fn=update_leaderboard_table,
+ inputs=[leaderboard_selector, search_box],
+ outputs=[leaderboard_table, search_info, header_view, metrics_view]
)
refresh_btn.click(
- fn=lambda: (gr.Dropdown(choices=get_available_leaderboards()), "Refreshed."),
- outputs=[leaderboard_selector, upload_status]
+ fn=lambda: gr.Dropdown(choices=get_available_leaderboards()),
+ outputs=[leaderboard_selector]
).then(
- fn=lambda: LEADERBOARD_CACHE.clear()
+ fn=lambda: clear_cache()
).then(
fn=update_leaderboard_table,
- inputs=[leaderboard_selector, search_box, group_by_model],
- outputs=[leaderboard_table, metadata_view]
+ inputs=[leaderboard_selector, search_box],
+ outputs=[leaderboard_table, search_info, header_view, metrics_view]
)
-
+
DATA_DIR.mkdir(exist_ok=True)
if __name__ == "__main__":
diff --git a/data_loader.py b/data_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..510e9e03d657bd0ebe9f27b9cc44bd47cf2e933a
--- /dev/null
+++ b/data_loader.py
@@ -0,0 +1,317 @@
+"""
+Data Loader: Load from HuggingFace, parse JSON files, and build tables.
+"""
+import json
+import pandas as pd
+from pathlib import Path
+from datasets import load_dataset
+
+
+# Global caches
+HF_DATASET_CACHE = {}
+LEADERBOARD_CACHE = {}
+DATA_DIR = Path("leaderboard_data")
+
+
+def load_hf_dataset_on_startup():
+ """Load all splits from HuggingFace dataset at startup."""
+ print("Loading dataset from HuggingFace...")
+ try:
+ dataset = load_dataset("deepmage121/eee_test")
+
+ for split_name, split_data in dataset.items():
+ print(f"Loading split: {split_name} ({len(split_data)} rows)")
+
+ df = split_data.to_pandas()
+ parsed_items = []
+
+ for _, row in df.iterrows():
+ evaluation_results = json.loads(row['evaluation_results'])
+
+ results = {}
+ for eval_result in evaluation_results:
+ eval_name = eval_result.get("evaluation_name")
+ score = eval_result.get("score_details", {}).get("score")
+ if eval_name and score is not None:
+ results[eval_name] = score
+
+ additional_details = {}
+ if pd.notna(row.get('additional_details')):
+ additional_details = json.loads(row['additional_details'])
+
+ parsed_item = {
+ "leaderboard": row['_leaderboard'],
+ "provider": row['source_organization_name'],
+ "model": row['model_id'],
+ "developer": row['model_developer'],
+ "params": additional_details.get('params_billions'),
+ "architecture": additional_details.get('architecture', 'Unknown'),
+ "precision": additional_details.get('precision', 'Unknown'),
+ "results": results,
+ "raw_data": {
+ "schema_version": row['schema_version'],
+ "evaluation_id": row['evaluation_id'],
+ "retrieved_timestamp": row['retrieved_timestamp'],
+ "source_data": json.loads(row['source_data']),
+ "evaluation_source": {
+ "evaluation_source_name": row['evaluation_source_name'],
+ "evaluation_source_type": row['evaluation_source_type']
+ },
+ "source_metadata": {
+ "source_organization_name": row['source_organization_name'],
+ "evaluator_relationship": row['evaluator_relationship'],
+ },
+ "model_info": {
+ "name": row['model_name'],
+ "id": row['model_id'],
+ "developer": row['model_developer'],
+ },
+ "evaluation_results": evaluation_results,
+ "additional_details": additional_details
+ }
+ }
+
+ if pd.notna(row.get('source_organization_url')):
+ parsed_item["raw_data"]["source_metadata"]["source_organization_url"] = row['source_organization_url']
+ if pd.notna(row.get('source_organization_logo_url')):
+ parsed_item["raw_data"]["source_metadata"]["source_organization_logo_url"] = row['source_organization_logo_url']
+ if pd.notna(row.get('model_inference_platform')):
+ parsed_item["raw_data"]["model_info"]["inference_platform"] = row['model_inference_platform']
+
+ parsed_items.append(parsed_item)
+
+ HF_DATASET_CACHE[split_name] = parsed_items
+
+ print(f"Loaded {len(HF_DATASET_CACHE)} leaderboard(s) from HuggingFace")
+ return True
+ except Exception as e:
+ print(f"Warning: Could not load HuggingFace dataset: {e}")
+ print("Falling back to local file system...")
+ return False
+
+
+def parse_eval_json(file_path):
+ """Parses a single JSON file to extract model, provider, and results."""
+ try:
+ with open(file_path, 'r') as f:
+ data = json.load(f)
+
+ leaderboard_name = data.get("evaluation_source", {}).get("evaluation_source_name", "Unknown Leaderboard")
+ provider_name = data.get("source_metadata", {}).get("source_organization_name", "Unknown Provider")
+ model_id = data.get("model_info", {}).get("id", "Unknown Model")
+ developer_name = data.get("model_info", {}).get("developer", "Unknown Developer")
+
+ params = data.get("model_info", {}).get("params_billions", None)
+ architecture = data.get("model_info", {}).get("architecture", "Unknown")
+ precision = data.get("additional_details", {}).get("precision", "Unknown")
+ if precision == "Unknown":
+ precision = data.get("model_info", {}).get("precision", "Unknown")
+
+ results = {}
+ if "evaluation_results" in data:
+ for res in data["evaluation_results"]:
+ eval_name = res.get("evaluation_name", "Unknown Metric")
+ score = res.get("score_details", {}).get("score", None)
+ if score is not None:
+ results[eval_name] = score
+
+ return {
+ "leaderboard": leaderboard_name,
+ "provider": provider_name,
+ "model": model_id,
+ "developer": developer_name,
+ "params": params,
+ "architecture": architecture,
+ "precision": precision,
+ "results": results,
+ "raw_data": data
+ }
+ except Exception as e:
+ print(f"Error parsing {file_path}: {e}")
+ return None
+
+
+def get_available_leaderboards():
+ """Returns available leaderboards from HF cache or local directory."""
+ if HF_DATASET_CACHE:
+ return list(HF_DATASET_CACHE.keys())
+
+ if not DATA_DIR.exists():
+ return []
+ return [d.name for d in DATA_DIR.iterdir() if d.is_dir()]
+
+
+def walk_eval_files(leaderboard_name):
+ """Generator that walks through Leaderboard directory recursively."""
+ lb_path = DATA_DIR / leaderboard_name
+ if not lb_path.exists():
+ return
+ yield from lb_path.rglob("*.json")
+
+
+def get_eval_metadata(selected_leaderboard):
+ """Extracts evaluation metadata from the leaderboard data."""
+ if not selected_leaderboard:
+ return {}
+
+ eval_metadata = {"evals": {}, "source_info": {}}
+
+ if selected_leaderboard in HF_DATASET_CACHE:
+ parsed_items = HF_DATASET_CACHE[selected_leaderboard]
+ if parsed_items:
+ parsed = parsed_items[0]
+
+ source_meta = parsed["raw_data"].get("source_metadata", {})
+ source_data_list = parsed["raw_data"].get("source_data", [])
+ url = source_data_list[0] if isinstance(source_data_list, list) and source_data_list else "#"
+
+ eval_metadata["source_info"] = {
+ "organization": source_meta.get("source_organization_name", "Unknown"),
+ "relationship": source_meta.get("evaluator_relationship", "Unknown"),
+ "url": url
+ }
+
+ if "evaluation_results" in parsed["raw_data"]:
+ for res in parsed["raw_data"]["evaluation_results"]:
+ eval_name = res.get("evaluation_name", "Unknown Metric")
+ if eval_name not in eval_metadata["evals"]:
+ metric_config = res.get("metric_config", {})
+ eval_metadata["evals"][eval_name] = {
+ "description": metric_config.get("evaluation_description", "No description available"),
+ "score_type": metric_config.get("score_type", "unknown"),
+ "lower_is_better": metric_config.get("lower_is_better", False),
+ "min_score": metric_config.get("min_score"),
+ "max_score": metric_config.get("max_score"),
+ "level_names": metric_config.get("level_names", []),
+ "level_metadata": metric_config.get("level_metadata", []),
+ "has_unknown_level": metric_config.get("has_unknown_level", False)
+ }
+ return eval_metadata
+
+ # Fall back to file system
+ for json_file in walk_eval_files(selected_leaderboard):
+ parsed = parse_eval_json(json_file)
+ if parsed:
+ if not eval_metadata["source_info"]:
+ source_meta = parsed["raw_data"].get("source_metadata", {})
+ source_data_list = parsed["raw_data"].get("source_data", [])
+ url = source_data_list[0] if isinstance(source_data_list, list) and source_data_list else "#"
+
+ eval_metadata["source_info"] = {
+ "organization": source_meta.get("source_organization_name", "Unknown"),
+ "relationship": source_meta.get("evaluator_relationship", "Unknown"),
+ "url": url
+ }
+
+ if "evaluation_results" in parsed["raw_data"]:
+ for res in parsed["raw_data"]["evaluation_results"]:
+ eval_name = res.get("evaluation_name", "Unknown Metric")
+ if eval_name not in eval_metadata["evals"]:
+ metric_config = res.get("metric_config", {})
+ eval_metadata["evals"][eval_name] = {
+ "description": metric_config.get("evaluation_description", "No description available"),
+ "score_type": metric_config.get("score_type", "unknown"),
+ "lower_is_better": metric_config.get("lower_is_better", False),
+ "min_score": metric_config.get("min_score"),
+ "max_score": metric_config.get("max_score"),
+ "level_names": metric_config.get("level_names", []),
+ "level_metadata": metric_config.get("level_metadata", []),
+ "has_unknown_level": metric_config.get("has_unknown_level", False)
+ }
+ break
+
+ return eval_metadata
+
+
+def build_leaderboard_table(selected_leaderboard, search_query="", progress_callback=None):
+ """Builds the leaderboard DataFrame from cache or files."""
+ if not selected_leaderboard:
+ return pd.DataFrame()
+
+ if selected_leaderboard in LEADERBOARD_CACHE:
+ df, _ = LEADERBOARD_CACHE[selected_leaderboard]
+ else:
+ rows = []
+
+ if selected_leaderboard in HF_DATASET_CACHE:
+ if progress_callback:
+ progress_callback(0, desc=f"Loading {selected_leaderboard} from cache...")
+
+ parsed_items = HF_DATASET_CACHE[selected_leaderboard]
+
+ for i, parsed in enumerate(parsed_items):
+ if i % 100 == 0 and progress_callback:
+ progress_callback((i / len(parsed_items)), desc=f"Processing {selected_leaderboard}...")
+
+ row = {
+ "Model": parsed["model"],
+ "Developer": parsed["developer"],
+ "Params (B)": parsed["params"],
+ "Arch": parsed["architecture"],
+ "Precision": parsed["precision"]
+ }
+ row.update(parsed["results"])
+ rows.append(row)
+ else:
+ # Fall back to file system
+ if progress_callback:
+ progress_callback(0, desc=f"Scanning {selected_leaderboard}...")
+
+ all_files = list(walk_eval_files(selected_leaderboard))
+ total_files = len(all_files)
+
+ for i, json_file in enumerate(all_files):
+ if i % 100 == 0 and progress_callback:
+ progress_callback((i / total_files), desc=f"Loading {selected_leaderboard}...")
+
+ parsed = parse_eval_json(json_file)
+ if parsed:
+ row = {
+ "Model": parsed["model"],
+ "Developer": parsed["developer"],
+ "Params (B)": parsed["params"],
+ "Arch": parsed["architecture"],
+ "Precision": parsed["precision"]
+ }
+ row.update(parsed["results"])
+ rows.append(row)
+
+ if not rows:
+ df = pd.DataFrame(columns=["Model", "Developer", "Params (B)", "Arch", "Precision"])
+ LEADERBOARD_CACHE[selected_leaderboard] = (df, None)
+ return df
+
+ df = pd.DataFrame(rows)
+ df = df.dropna(axis=1, how='all')
+
+ if df.empty:
+ LEADERBOARD_CACHE[selected_leaderboard] = (df, None)
+ return df
+
+ numeric_cols = df.select_dtypes(include=['float', 'int']).columns
+ df[numeric_cols] = df[numeric_cols].round(3)
+
+ # Add Average Score
+ eval_only_cols = [c for c in numeric_cols if c not in ["Params (B)"]]
+ if len(eval_only_cols) > 0:
+ df["Average"] = df[eval_only_cols].mean(axis=1).round(3)
+
+ base_cols = ["Model", "Developer", "Params (B)", "Arch", "Precision", "Average"]
+ eval_cols = [c for c in df.columns if c not in base_cols]
+ base_cols = [c for c in base_cols if c in df.columns]
+
+ final_cols = base_cols + sorted(eval_cols)
+ df = df[final_cols]
+
+ if "Average" in df.columns:
+ df = df.sort_values("Average", ascending=False)
+
+ LEADERBOARD_CACHE[selected_leaderboard] = (df, None)
+
+ return df
+
+
+def clear_cache():
+ """Clears all caches."""
+ LEADERBOARD_CACHE.clear()
+
diff --git a/eval.schema.json b/eval.schema.json
new file mode 100644
index 0000000000000000000000000000000000000000..4be0e6ff925ed642124d84e36d4e5e467c71060e
--- /dev/null
+++ b/eval.schema.json
@@ -0,0 +1,282 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema#",
+ "version": "0.0.1",
+ "type": "object",
+ "description": "Schema for storing and validating LLMs evaluation data, including model configuration, prompts, instances, Output, and evaluation metrics",
+ "required": [
+ "schema_version",
+ "evaluation_id",
+ "evaluation_source",
+ "retrieved_timestamp",
+ "source_data",
+ "source_metadata",
+ "model_info",
+ "evaluation_results"
+ ],
+ "properties": {
+ "schema_version": {
+ "type": "string",
+ "description": "Version of the schema used for this evaluation data"
+ },
+ "evaluation_id": {
+ "type": "string",
+ "description": "Unique identifier for this specific evaluation run. Use org_name/eval_name/retrieved_timestamp format"
+ },
+ "retrieved_timestamp": {
+ "type": "string",
+ "description": "Timestamp for when this record was created"
+ },
+ "source_data": {
+ "type": "array",
+ "description": "URLs for the source of the evaluation data",
+ "items": {
+ "type": "string"
+ }
+ },
+ "evaluation_source": {
+ "type": "object",
+ "description": "Details about evaluation origin. There are options that evaluations come from leaderboards (e.g. Live Code Bench Pro) or evaluation platforms (e.g. lm-eval, inspect ai, HELM...).",
+ "required": [
+ "evaluation_source_name",
+ "evaluation_source_type"
+ ],
+ "properties": {
+ "evaluation_source_name": {
+ "type": "string",
+ "description": "Name of the source (e.g. title of the source leaderboard or name of the platform used for the evaluation."
+ },
+ "evaluation_source_type": {
+ "type": "string",
+ "enum": [
+ "leaderboard",
+ "evaluation_platform"
+ ],
+ "description": "Type of evaluation source, e.g., leaderboard or evaluation platform"
+ }
+ }
+ },
+ "source_metadata": {
+ "type": "object",
+ "description": "Metadata about the source of the leaderboard data",
+ "required": [
+ "source_organization_name",
+ "evaluator_relationship"
+ ],
+ "properties": {
+ "source_organization_name": {
+ "type": "string",
+ "description": "Name of the organization that provides the data"
+ },
+ "source_organization_url": {
+ "type": "string",
+ "description": "URL for the organization that provides the data"
+ },
+ "source_organization_logo_url": {
+ "type": "string",
+ "description": "URL for the Logo for the organization that provides the data"
+ },
+ "evaluator_relationship": {
+ "type": "string",
+ "description": "Relationship between the evaluator and the model",
+ "enum": [
+ "first_party",
+ "third_party",
+ "collaborative",
+ "other"
+ ]
+ }
+ }
+ },
+ "model_info": {
+ "type": "object",
+ "description": "Complete model specification including basic information, technical configuration and inference settings",
+ "required": [
+ "name",
+ "id"
+ ],
+ "properties": {
+ "name": {
+ "type": "string",
+ "description": "Model name provided by evaluation source"
+ },
+ "id": {
+ "type": "string",
+ "description": "Model name standarized to HuggingFace format (e.g. meta-llama/Llama-3.1-8B-Instruct)"
+ },
+ "developer": {
+ "type": "string",
+ "description": "Name of organization that provides the model (e.g. 'OpenAI')"
+ },
+ "inference_platform": {
+ "type": "string",
+ "description": "Description of platform used to run the evaluations (e.g. local machine, Bedrock)"
+ }
+ }
+ },
+ "evaluation_results": {
+ "type": "array",
+ "description": "Array of evaluation results",
+ "items": {
+ "type": "object",
+ "required": [
+ "evaluation_name",
+ "metric_config",
+ "score_details"
+ ],
+ "properties": {
+ "evaluation_name": {
+ "type": "string",
+ "description": "Name of the evaluation"
+ },
+ "evaluation_timestamp": {
+ "type": "string",
+ "description": "Timestamp for when the evaluations were run"
+ },
+ "metric_config": {
+ "type": "object",
+ "description": "Details about the metric",
+ "required": [
+ "lower_is_better"
+ ],
+ "properties": {
+ "evaluation_description": {
+ "type": "string",
+ "description": "Description of the evaluation"
+ },
+ "lower_is_better": {
+ "type": "boolean",
+ "description": "Whether a lower score is better"
+ },
+ "score_type": {
+ "type": "string",
+ "description": "Type of score",
+ "enum": [
+ "binary",
+ "continuous",
+ "levels"
+ ]
+ },
+ "level_names": {
+ "type": "array",
+ "description": "Names of the score levels",
+ "items": {
+ "type": "string"
+ }
+ },
+ "level_metadata": {
+ "type": "array",
+ "description": "Additional Description for each Score Level",
+ "items": {
+ "type": "string"
+ }
+ },
+ "has_unknown_level": {
+ "type": "boolean",
+ "description": "Indicates whether there is an Unknown Level - if True, then a score of -1 will be treated as Unknown"
+ },
+ "min_score": {
+ "type": "number",
+ "description": "Minimum possible score for continuous metric"
+ },
+ "max_score": {
+ "type": "number",
+ "description": "Maximum possible score for continuous metric"
+ }
+ },
+ "if": {
+ "properties": {
+ "score_type": {
+ "const": "levels"
+ }
+ }
+ },
+ "then": {
+ "required": [
+ "level_names",
+ "has_unknown_level"
+ ]
+ },
+ "else": {
+ "if": {
+ "properties": {
+ "score_type": {
+ "const": "continuous"
+ }
+ }
+ },
+ "then": {
+ "required": [
+ "min_score",
+ "max_score"
+ ]
+ }
+ }
+ },
+ "score_details": {
+ "type": "object",
+ "description": "The score for the evaluation and related details",
+ "required": [
+ "score"
+ ],
+ "properties": {
+ "score": {
+ "type": "number",
+ "description": "The score for the evaluation"
+ },
+ "details": {
+ "type": "object",
+ "description": "Any additional details about the score",
+ "additionalProperties": true
+ }
+ }
+ },
+ "detailed_evaluation_results_url": {
+ "type": "string",
+ "description": "Link to detailed evaluation data"
+ },
+ "generation_config": {
+ "type": "object",
+ "generation_args": {
+ "type": "object",
+ "description": "Parameters used to generate results - properties may vary by model type",
+ "properties": {
+ "temperature": {
+ "type": [
+ "null",
+ "number"
+ ],
+ "description": "Sampling temperature"
+ },
+ "top_p": {
+ "type": [
+ "null",
+ "number"
+ ],
+ "description": "Nucleus sampling parameter"
+ },
+ "top_k": {
+ "type": [
+ "null",
+ "number"
+ ],
+ "description": "Top-k sampling parameter"
+ },
+ "max_tokens": {
+ "type": "integer",
+ "minimum": 1,
+ "description": "Maximum number of tokens to generate"
+ }
+ },
+ "additionalProperties": true
+ },
+ "additional_details": {
+ "type": "string",
+ "description": "Additional details about how the results for this metric were generated."
+ }
+ }
+ }
+ }
+
+ }
+ }
+}
diff --git a/hf_operations.py b/hf_operations.py
new file mode 100644
index 0000000000000000000000000000000000000000..18d0fc4eb6b2e1cc40336d3686ddd027adeaac66
--- /dev/null
+++ b/hf_operations.py
@@ -0,0 +1,202 @@
+"""
+HuggingFace Operations: Upload data, create PRs, validate schemas.
+"""
+from huggingface_hub import HfApi, login
+import pandas as pd
+import json
+from pathlib import Path
+from jsonschema import validate, ValidationError, Draft7Validator
+
+
+# Load schema once at module level
+SCHEMA_PATH = Path(__file__).parent / "eval.schema.json"
+with open(SCHEMA_PATH, 'r') as f:
+ EVAL_SCHEMA = json.load(f)
+
+
+def validate_json_against_schema(json_data):
+ """
+ Validate a JSON object against eval.schema.json.
+
+ Args:
+ json_data: Dict containing the evaluation data
+
+ Returns:
+ (bool, str): (is_valid, error_message)
+ """
+ try:
+ validate(instance=json_data, schema=EVAL_SCHEMA)
+ return True, "Schema validation passed"
+ except ValidationError as e:
+ # Extract the most relevant error message
+ error_path = " â ".join(str(p) for p in e.path) if e.path else "root"
+ return False, f"â Schema validation failed at '{error_path}': {e.message}"
+ except Exception as e:
+ return False, f"â Validation error: {str(e)}"
+
+
+def upload_to_hf_dataset(parquet_file, split_name, repo_id="deepmage121/eee_test"):
+ """
+ Upload a parquet file as a new split to the HF dataset.
+
+ Args:
+ parquet_file: Path to parquet file
+ split_name: Name of the split (leaderboard name)
+ repo_id: HuggingFace dataset repository ID
+ """
+ # TODO: Implement upload logic
+ pass
+
+
+def check_hf_authentication():
+ """
+ Check if user is authenticated with HuggingFace.
+
+ Returns:
+ (bool, str): (is_authenticated, username or error_message)
+ """
+ try:
+ api = HfApi()
+ user_info = api.whoami()
+ return True, user_info['name']
+ except Exception as e:
+ return False, "Not authenticated. Run: huggingface-cli login"
+
+
+def check_duplicate_pr_exists(leaderboard_name, repo_id="deepmage121/eee_test"):
+ """
+ Check if a PR already exists for this leaderboard.
+
+ Args:
+ leaderboard_name: Name of the leaderboard
+ repo_id: HuggingFace dataset repository ID
+
+ Returns:
+ (bool, str or None): (exists, pr_url if exists)
+ """
+ try:
+ api = HfApi()
+ discussions = api.get_repo_discussions(repo_id=repo_id, repo_type="dataset")
+
+ # Check for open PRs with matching title
+ pr_title_pattern = f"add new leaderboard: {leaderboard_name.lower()}"
+ for discussion in discussions:
+ if discussion.is_pull_request and discussion.status == "open":
+ if pr_title_pattern in discussion.title.lower():
+ pr_url = f"https://huggingface.co/datasets/{repo_id}/discussions/{discussion.num}"
+ return True, pr_url
+
+ return False, None
+ except Exception as e:
+ # If we can't check, assume no duplicate (fail open)
+ print(f"Warning: Could not check for duplicate PRs: {e}")
+ return False, None
+
+
+def create_pr_for_new_leaderboard(leaderboard_name, parquet_file, repo_id="deepmage121/eee_test"):
+ """
+ Create a pull request to add a new leaderboard split.
+
+ Args:
+ leaderboard_name: Name of the new leaderboard
+ parquet_file: Path to parquet file
+ repo_id: HuggingFace dataset repository ID
+
+ Returns:
+ (success, pr_url or error_message)
+ """
+ # 1. Check authentication
+ is_auth, auth_result = check_hf_authentication()
+ if not is_auth:
+ return False, f"â {auth_result}"
+
+ # 2. Check for duplicate PR
+ has_duplicate, duplicate_url = check_duplicate_pr_exists(leaderboard_name, repo_id)
+ if has_duplicate:
+ return False, f"â ī¸ PR already exists: {duplicate_url}"
+
+ # 3. Validate parquet file exists and has data
+ parquet_path = Path(parquet_file)
+ if not parquet_path.exists():
+ return False, "â Parquet file not found"
+
+ df = pd.read_parquet(parquet_file)
+ if len(df) == 0:
+ return False, "â Parquet file is empty"
+
+ # 4. Create PR
+ try:
+ api = HfApi()
+
+ # Upload the parquet file to the branch
+ commit_message = f"Add new leaderboard: {leaderboard_name}"
+
+ # Upload file and create PR
+ commit_info = api.upload_file(
+ path_or_fileobj=parquet_file,
+ path_in_repo=f"data/{leaderboard_name}.parquet",
+ repo_id=repo_id,
+ repo_type="dataset",
+ commit_message=commit_message,
+ create_pr=True,
+ )
+
+ # Extract PR URL from commit info
+ pr_url = commit_info.pr_url if hasattr(commit_info, 'pr_url') else f"https://huggingface.co/datasets/{repo_id}/discussions"
+
+ return True, f"PR created ({len(df)} rows): {pr_url}"
+
+ except Exception as e:
+ return False, f"â Failed to create PR: {str(e)}"
+
+
+def validate_schema(parquet_file):
+ """
+ Validate that a parquet file matches the expected schema.
+
+ Args:
+ parquet_file: Path to parquet file to validate
+
+ Returns:
+ (bool, str): (is_valid, error_message)
+ """
+ try:
+ df = pd.read_parquet(parquet_file)
+
+ # Required columns
+ required_cols = [
+ '_leaderboard', '_developer', '_model', '_uuid',
+ 'schema_version', 'evaluation_id', 'retrieved_timestamp',
+ 'source_data', 'evaluation_source_name', 'evaluation_source_type',
+ 'source_organization_name', 'evaluator_relationship',
+ 'model_name', 'model_id', 'model_developer',
+ 'evaluation_results'
+ ]
+
+ missing = [col for col in required_cols if col not in df.columns]
+ if missing:
+ return False, f"Missing required columns: {', '.join(missing)}"
+
+ # Check data types (all should be strings)
+ for col in df.columns:
+ if df[col].dtype not in ['object', 'string']:
+ return False, f"Column '{col}' has wrong type: {df[col].dtype} (expected string)"
+
+ return True, "Schema validation passed"
+
+ except Exception as e:
+ return False, f"Validation error: {str(e)}"
+
+
+def export_to_json(parquet_file, output_dir):
+ """
+ Export parquet data back to JSON files.
+ Uses the parquet_to_folder function from json_to_parquet.py
+
+ Args:
+ parquet_file: Path to parquet file
+ output_dir: Directory to write JSON files to
+ """
+ from json_to_parquet import parquet_to_folder
+ parquet_to_folder(parquet_file, output_dir)
+
diff --git a/json_to_parquet.py b/json_to_parquet.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1b701fa8a354831c11579e49042235dd8982b94
--- /dev/null
+++ b/json_to_parquet.py
@@ -0,0 +1,228 @@
+
+
+import json
+from pathlib import Path
+import pandas as pd
+
+
+def json_to_row(json_path: Path) -> dict:
+ """Convert one JSON to a single row (1 JSON = 1 row, evaluations as columns)."""
+ with open(json_path, 'r') as f:
+ data = json.load(f)
+
+ required_fields = ["schema_version", "evaluation_id", "evaluation_source", "retrieved_timestamp",
+ "source_data", "source_metadata", "model_info", "evaluation_results"]
+ for field in required_fields:
+ if field not in data:
+ raise ValueError(f"{json_path}: Missing required field '{field}'")
+
+ if "evaluation_source_name" not in data["evaluation_source"]:
+ raise ValueError(f"{json_path}: Missing required field 'evaluation_source.evaluation_source_name'")
+ if "evaluation_source_type" not in data["evaluation_source"]:
+ raise ValueError(f"{json_path}: Missing required field 'evaluation_source.evaluation_source_type'")
+
+ if "source_organization_name" not in data["source_metadata"]:
+ raise ValueError(f"{json_path}: Missing required field 'source_metadata.source_organization_name'")
+ if "evaluator_relationship" not in data["source_metadata"]:
+ raise ValueError(f"{json_path}: Missing required field 'source_metadata.evaluator_relationship'")
+
+ if "name" not in data["model_info"]:
+ raise ValueError(f"{json_path}: Missing required field 'model_info.name'")
+ if "id" not in data["model_info"]:
+ raise ValueError(f"{json_path}: Missing required field 'model_info.id'")
+ if "developer" not in data["model_info"]:
+ raise ValueError(f"{json_path}: Missing required field 'model_info.developer'")
+
+ leaderboard = data["evaluation_source"]["evaluation_source_name"]
+ model = data["model_info"]["id"]
+ uuid = json_path.stem
+ developer = data["model_info"]["developer"]
+
+ # Validate evaluation results
+ for eval_result in data["evaluation_results"]:
+ if "evaluation_name" not in eval_result:
+ raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].evaluation_name'")
+ if "metric_config" not in eval_result:
+ raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].metric_config'")
+ if "score_details" not in eval_result:
+ raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].score_details'")
+
+ if "lower_is_better" not in eval_result["metric_config"]:
+ raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].metric_config.lower_is_better'")
+ if "score" not in eval_result["score_details"]:
+ raise ValueError(f"{json_path}: Missing required field 'evaluation_results[].score_details.score'")
+
+ row = {
+ # Folder structure (for reconstruction)
+ "_leaderboard": leaderboard,
+ "_developer": developer,
+ "_model": model,
+ "_uuid": uuid,
+
+ # Required top-level fields
+ "schema_version": data["schema_version"],
+ "evaluation_id": data["evaluation_id"],
+ "retrieved_timestamp": data["retrieved_timestamp"],
+ "source_data": json.dumps(data["source_data"]),
+
+ # Required nested fields
+ "evaluation_source_name": data["evaluation_source"]["evaluation_source_name"],
+ "evaluation_source_type": data["evaluation_source"]["evaluation_source_type"],
+
+ "source_organization_name": data["source_metadata"]["source_organization_name"],
+ "source_organization_url": data["source_metadata"].get("source_organization_url"),
+ "source_organization_logo_url": data["source_metadata"].get("source_organization_logo_url"),
+ "evaluator_relationship": data["source_metadata"]["evaluator_relationship"],
+
+ "model_name": data["model_info"]["name"],
+ "model_id": data["model_info"]["id"],
+ "model_developer": data["model_info"]["developer"],
+ "model_inference_platform": data["model_info"].get("inference_platform"),
+
+ # Store full evaluation_results and additional_details as JSON
+ "evaluation_results": json.dumps(data["evaluation_results"]),
+ "additional_details": json.dumps(data["additional_details"]) if "additional_details" in data else None,
+ }
+
+ return row
+
+
+def add_to_parquet(json_or_folder: str, parquet_file: str):
+ """
+ Add JSON(s) to Parquet file.
+ Creates new file if it doesn't exist, appends and deduplicates if it does.
+
+ Args:
+ json_or_folder: Path to single JSON file or folder containing JSONs
+ parquet_file: Output Parquet file path
+ """
+ input_path = Path(json_or_folder)
+
+ if input_path.is_file():
+ json_files = [input_path]
+ elif input_path.is_dir():
+ json_files = list(input_path.rglob("*.json"))
+ if not json_files:
+ raise ValueError(f"No JSON files found in directory: {json_or_folder}")
+ else:
+ raise ValueError(f"Invalid input: {json_or_folder}")
+
+ print(f"Processing {len(json_files)} JSON file(s)...")
+
+ parquet_path = Path(parquet_file)
+ if parquet_path.exists():
+ existing_df = pd.read_parquet(parquet_file)
+ existing_keys = set(
+ existing_df[["_leaderboard", "_developer", "_model", "_uuid"]]
+ .apply(tuple, axis=1)
+ )
+ print(f"Found {len(existing_df)} existing rows")
+ else:
+ existing_df = None
+ existing_keys = set()
+
+ all_rows = []
+ skipped = 0
+ for i, jf in enumerate(json_files, 1):
+ if i % 100 == 0:
+ print(f" {i}/{len(json_files)}")
+
+ row = json_to_row(jf)
+ key = (row["_leaderboard"], row["_developer"], row["_model"], row["_uuid"])
+ if key not in existing_keys:
+ all_rows.append(row)
+ existing_keys.add(key)
+ else:
+ skipped += 1
+
+ if skipped > 0:
+ print(f" Skipped {skipped} duplicate file(s)")
+
+ # Handle case where no new rows to add
+ if not all_rows:
+ if existing_df is not None:
+ print(f"No new files to add, keeping existing {len(existing_df)} file(s)")
+ return
+ else:
+ raise ValueError("No valid JSON files to process and no existing parquet file")
+
+ new_df = pd.DataFrame(all_rows)
+
+ if existing_df is not None:
+ df = pd.concat([existing_df, new_df], ignore_index=True)
+ print(f"Added {len(new_df)} new file(s) to existing {len(existing_df)} file(s)")
+ else:
+ df = new_df
+
+ df.to_parquet(parquet_file, index=False)
+ print(f"Saved {len(df)} total file(s) to {parquet_file} ({parquet_path.stat().st_size / 1024 / 1024:.1f} MB)")
+
+
+def parquet_to_folder(parquet_file: str, output_dir: str):
+ """Reconstruct folder structure from Parquet."""
+ df = pd.read_parquet(parquet_file)
+ out = Path(output_dir)
+
+ for _, row in df.iterrows():
+ lb = row["_leaderboard"]
+ dev = row["_developer"]
+ model = row["_model"]
+ uuid = row["_uuid"]
+
+ json_data = {
+ "schema_version": row["schema_version"],
+ "evaluation_id": row["evaluation_id"],
+ "retrieved_timestamp": row["retrieved_timestamp"],
+ "source_data": json.loads(row["source_data"]),
+ "evaluation_source": {
+ "evaluation_source_name": row["evaluation_source_name"],
+ "evaluation_source_type": row["evaluation_source_type"]
+ },
+ "source_metadata": {
+ "source_organization_name": row["source_organization_name"],
+ "evaluator_relationship": row["evaluator_relationship"]
+ },
+ "model_info": {
+ "name": row["model_name"],
+ "id": row["model_id"],
+ "developer": row["model_developer"]
+ },
+ "evaluation_results": json.loads(row["evaluation_results"])
+ }
+
+ if pd.notna(row["source_organization_url"]):
+ json_data["source_metadata"]["source_organization_url"] = row["source_organization_url"]
+ if pd.notna(row["source_organization_logo_url"]):
+ json_data["source_metadata"]["source_organization_logo_url"] = row["source_organization_logo_url"]
+
+ if pd.notna(row["model_inference_platform"]):
+ json_data["model_info"]["inference_platform"] = row["model_inference_platform"]
+
+ if pd.notna(row["additional_details"]):
+ json_data["additional_details"] = json.loads(row["additional_details"])
+
+ file_path = out / lb / dev / model / f"{uuid}.json"
+ file_path.parent.mkdir(parents=True, exist_ok=True)
+ with open(file_path, 'w') as f:
+ json.dump(json_data, f, indent=2)
+
+ print(f"Reconstructed {len(df)} files to {output_dir}")
+
+
+if __name__ == "__main__":
+ import sys
+
+ if len(sys.argv) < 2:
+ print("Usage:")
+ print(" python json_to_parquet.py add ")
+ print(" python json_to_parquet.py export ")
+ sys.exit(1)
+
+ cmd = sys.argv[1]
+
+ if cmd == "add":
+ add_to_parquet(sys.argv[2], sys.argv[3])
+ elif cmd == "export":
+ parquet_to_folder(sys.argv[2], sys.argv[3])
+ else:
+ print(f"Unknown command: {cmd}")
diff --git a/leaderboard_data/HFOpenLLMv2/0-hero/0-hero_Matter-0.2-7B-DPO/40e80d5e-db72-46b7-bd14-b7d005df4be8.json b/leaderboard_data/HFOpenLLMv2/0-hero/0-hero_Matter-0.2-7B-DPO/40e80d5e-db72-46b7-bd14-b7d005df4be8.json
deleted file mode 100644
index 13d42abffb5b6dec5b881d249e70ecf1598aaeae..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/0-hero/0-hero_Matter-0.2-7B-DPO/40e80d5e-db72-46b7-bd14-b7d005df4be8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/0-hero_Matter-0.2-7B-DPO/1762652579.4626381",
- "retrieved_timestamp": "1762652579.462642",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "0-hero/Matter-0.2-7B-DPO",
- "developer": "0-hero",
- "inference_platform": "unknown",
- "id": "0-hero/Matter-0.2-7B-DPO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3302792147058693
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3596254301656297
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.014350453172205438
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.381375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1163563829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-32K/0d91a153-1b6b-4891-8722-a5c7e372ba64.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-32K/0d91a153-1b6b-4891-8722-a5c7e372ba64.json
deleted file mode 100644
index 80547e421e154b508a24aebfce93d4238b937691..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-32K/0d91a153-1b6b-4891-8722-a5c7e372ba64.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B-32K/1762652579.463656",
- "retrieved_timestamp": "1762652579.463657",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "01-ai/Yi-1.5-34B-32K",
- "developer": "01-ai",
- "inference_platform": "unknown",
- "id": "01-ai/Yi-1.5-34B-32K"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3118691737922047
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6015685776542417
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1540785498489426
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36325503355704697
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4398229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4709109042553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.389
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-Chat-16K/2192007d-1f6e-4f74-b518-7448ef3a896e.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-Chat-16K/2192007d-1f6e-4f74-b518-7448ef3a896e.json
deleted file mode 100644
index d0dd58d6ee88c61b554b2e20e6cd035cf66c34a4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-Chat-16K/2192007d-1f6e-4f74-b518-7448ef3a896e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B-Chat-16K/1762652579.464125",
- "retrieved_timestamp": "1762652579.4641259",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "01-ai/Yi-1.5-34B-Chat-16K",
- "developer": "01-ai",
- "inference_platform": "unknown",
- "id": "01-ai/Yi-1.5-34B-Chat-16K"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.456449997118756
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6100218256499571
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21374622356495468
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43976041666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45445478723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.389
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-Chat/e335874b-9b3e-4966-a7e0-22e9d16f8324.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-Chat/e335874b-9b3e-4966-a7e0-22e9d16f8324.json
deleted file mode 100644
index e872cb56c27fa3d3098d01803b28e9be7dbd8b1c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B-Chat/e335874b-9b3e-4966-a7e0-22e9d16f8324.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B-Chat/1762652579.463886",
- "retrieved_timestamp": "1762652579.4638872",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "01-ai/Yi-1.5-34B-Chat",
- "developer": "01-ai",
- "inference_platform": "unknown",
- "id": "01-ai/Yi-1.5-34B-Chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6066758423205982
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6083748310271819
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.277190332326284
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3649328859060403
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4281979166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45204454787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.389
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B/8409c158-ef12-4e6c-8a1d-7be2084b3446.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B/8409c158-ef12-4e6c-8a1d-7be2084b3446.json
deleted file mode 100644
index 09588b7620fa9279af91f885eb3775bc4b3ee9f9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-34B/8409c158-ef12-4e6c-8a1d-7be2084b3446.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-34B/1762652579.4633532",
- "retrieved_timestamp": "1762652579.463354",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "01-ai/Yi-1.5-34B",
- "developer": "01-ai",
- "inference_platform": "unknown",
- "id": "01-ai/Yi-1.5-34B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2841172533322695
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5976391706360018
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15332326283987915
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36577181208053694
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4236041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4665890957446808
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.389
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-6B-Chat/3452e57f-3023-4e2e-ad84-b09e409fe334.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-6B-Chat/3452e57f-3023-4e2e-ad84-b09e409fe334.json
deleted file mode 100644
index 7d05d24e5b234d149a660e3f6fcf983a780fdcb5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-6B-Chat/3452e57f-3023-4e2e-ad84-b09e409fe334.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-6B-Chat/1762652579.464571",
- "retrieved_timestamp": "1762652579.464572",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "01-ai/Yi-1.5-6B-Chat",
- "developer": "01-ai",
- "inference_platform": "unknown",
- "id": "01-ai/Yi-1.5-6B-Chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5145270105542183
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4571311331954389
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1623867069486405
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43917708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3193151595744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.061
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-6B/1a1f1263-96b6-4e32-a2c8-6c0d6b47dff9.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-6B/1a1f1263-96b6-4e32-a2c8-6c0d6b47dff9.json
deleted file mode 100644
index 802fda80e1c556a6e6c2f86cb20923f65ca6c80f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-6B/1a1f1263-96b6-4e32-a2c8-6c0d6b47dff9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-6B/1762652579.464354",
- "retrieved_timestamp": "1762652579.464355",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "01-ai/Yi-1.5-6B",
- "developer": "01-ai",
- "inference_platform": "unknown",
- "id": "01-ai/Yi-1.5-6B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26166017278598563
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44925820198929056
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06646525679758308
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43740625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31441156914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.061
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-32K/df9d9d44-daa1-4e61-9b46-192380043889.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-32K/df9d9d44-daa1-4e61-9b46-192380043889.json
deleted file mode 100644
index 9e8fb948cee6a95485a44cadd575e54f755f41a7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-32K/df9d9d44-daa1-4e61-9b46-192380043889.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B-32K/1762652579.4649951",
- "retrieved_timestamp": "1762652579.464996",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "01-ai/Yi-1.5-9B-32K",
- "developer": "01-ai",
- "inference_platform": "unknown",
- "id": "01-ai/Yi-1.5-9B-32K"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23031113002389217
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.496332115988265
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10800604229607251
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35906040268456374
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4186145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37649601063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.829
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-Chat-16K/090c9691-4b7e-4a98-b9a2-644e21797be4.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-Chat-16K/090c9691-4b7e-4a98-b9a2-644e21797be4.json
deleted file mode 100644
index 9f8095e138984210fb35cd26340a79758a1b12a6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-Chat-16K/090c9691-4b7e-4a98-b9a2-644e21797be4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B-Chat-16K/1762652579.465471",
- "retrieved_timestamp": "1762652579.465471",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "01-ai/Yi-1.5-9B-Chat-16K",
- "developer": "01-ai",
- "inference_platform": "unknown",
- "id": "01-ai/Yi-1.5-9B-Chat-16K"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4214040966856829
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5153383364651778
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1782477341389728
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40990624999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39935172872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.829
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-Chat/9256c32b-d956-418f-97da-ea78e3ad9e48.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-Chat/9256c32b-d956-418f-97da-ea78e3ad9e48.json
deleted file mode 100644
index ff3ac391bcccc9fe3eee9293aeedb40aff1fb3bc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B-Chat/9256c32b-d956-418f-97da-ea78e3ad9e48.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B-Chat/1762652579.465226",
- "retrieved_timestamp": "1762652579.465226",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "01-ai/Yi-1.5-9B-Chat",
- "developer": "01-ai",
- "inference_platform": "unknown",
- "id": "01-ai/Yi-1.5-9B-Chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6045525871354672
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.555906430281685
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2258308157099698
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3347315436241611
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42590625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39752327127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.829
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B/904d1f91-3153-49d5-afd3-9921bfc086f1.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B/904d1f91-3153-49d5-afd3-9921bfc086f1.json
deleted file mode 100644
index 9a4e6bc7a3662a1c66d580025c652df1dae25728..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-1.5-9B/904d1f91-3153-49d5-afd3-9921bfc086f1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/01-ai_Yi-1.5-9B/1762652579.464781",
- "retrieved_timestamp": "1762652579.464782",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "01-ai/Yi-1.5-9B",
- "developer": "01-ai",
- "inference_platform": "unknown",
- "id": "01-ai/Yi-1.5-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29358435617494916
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.514294179104191
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11404833836858005
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37919463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43278124999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3916223404255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.829
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B-200K/fb2ebd9a-f5b8-42a2-9b58-e6f0e7d9b98a.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B-200K/fb2ebd9a-f5b8-42a2-9b58-e6f0e7d9b98a.json
deleted file mode 100644
index 5655666ddc0b371f6f2d8b95b176f1b8807ad32f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B-200K/fb2ebd9a-f5b8-42a2-9b58-e6f0e7d9b98a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/01-ai_Yi-34B-200K/1762652579.465893",
- "retrieved_timestamp": "1762652579.465894",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "01-ai/Yi-34B-200K",
- "developer": "01-ai",
- "inference_platform": "unknown",
- "id": "01-ai/Yi-34B-200K"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15424850507763843
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5441817925289527
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05740181268882175
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3565436241610738
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38171874999999994
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45345744680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.389
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B-Chat/5d9b9217-874b-426d-8af4-5105a3b1b3ad.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B-Chat/5d9b9217-874b-426d-8af4-5105a3b1b3ad.json
deleted file mode 100644
index b31034cb091a3f033e623a876161ecd928080812..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B-Chat/5d9b9217-874b-426d-8af4-5105a3b1b3ad.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/01-ai_Yi-34B-Chat/1762652579.466115",
- "retrieved_timestamp": "1762652579.4661162",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "01-ai/Yi-34B-Chat",
- "developer": "01-ai",
- "inference_platform": "unknown",
- "id": "01-ai/Yi-34B-Chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4698887839820565
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5560872910766164
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06268882175226587
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39784375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4093251329787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.389
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B/3ebcbf3d-cb2d-4332-bb8a-1db104033391.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B/3ebcbf3d-cb2d-4332-bb8a-1db104033391.json
deleted file mode 100644
index b6ff74f37afa9da113b821db4eecd9ebd7f9877f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-34B/3ebcbf3d-cb2d-4332-bb8a-1db104033391.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/01-ai_Yi-34B/1762652579.4656792",
- "retrieved_timestamp": "1762652579.46568",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "01-ai/Yi-34B",
- "developer": "01-ai",
- "inference_platform": "unknown",
- "id": "01-ai/Yi-34B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3045751938190667
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5457099951794562
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0513595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36661073825503354
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4118541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.441156914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.389
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B-200K/6b720e8b-aab8-4ba4-9bce-e7a1de3cfb86.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B-200K/6b720e8b-aab8-4ba4-9bce-e7a1de3cfb86.json
deleted file mode 100644
index 992afa07503d5e311dac51ec373d85c721355d5c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B-200K/6b720e8b-aab8-4ba4-9bce-e7a1de3cfb86.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/01-ai_Yi-6B-200K/1762652579.4665558",
- "retrieved_timestamp": "1762652579.466557",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "01-ai/Yi-6B-200K",
- "developer": "01-ai",
- "inference_platform": "unknown",
- "id": "01-ai/Yi-6B-200K"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08433068702154728
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42892948109603307
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01812688821752266
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28187919463087246
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45873958333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2844082446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.061
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B-Chat/1120c801-7736-4d9d-b23d-08eeedb34186.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B-Chat/1120c801-7736-4d9d-b23d-08eeedb34186.json
deleted file mode 100644
index 791c74d16759b5d384bb9850098575eee927cdbc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B-Chat/1120c801-7736-4d9d-b23d-08eeedb34186.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/01-ai_Yi-6B-Chat/1762652579.466805",
- "retrieved_timestamp": "1762652579.466806",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "01-ai/Yi-6B-Chat",
- "developer": "01-ai",
- "inference_platform": "unknown",
- "id": "01-ai/Yi-6B-Chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33952135888331847
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41326019207548687
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.013595166163141994
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36879166666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3061003989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.061
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B/297419fa-855c-4eae-ad7c-3cf4a0262450.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B/297419fa-855c-4eae-ad7c-3cf4a0262450.json
deleted file mode 100644
index fc94c92f131b6fd66dc3adf0ee7fbc6cb5d1e6d1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-6B/297419fa-855c-4eae-ad7c-3cf4a0262450.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/01-ai_Yi-6B/1762652579.4663382",
- "retrieved_timestamp": "1762652579.4663382",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "01-ai/Yi-6B",
- "developer": "01-ai",
- "inference_platform": "unknown",
- "id": "01-ai/Yi-6B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28933784580468713
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4309230591000865
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015861027190332326
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26929530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39368749999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29911901595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.061
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-9B-200K/4299df04-495a-4687-b143-96b1b562d5e8.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-9B-200K/4299df04-495a-4687-b143-96b1b562d5e8.json
deleted file mode 100644
index 08d337e8165a0cb928d3e015bec507e4bbabb20b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-9B-200K/4299df04-495a-4687-b143-96b1b562d5e8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/01-ai_Yi-9B-200K/1762652579.467233",
- "retrieved_timestamp": "1762652579.467233",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "01-ai/Yi-9B-200K",
- "developer": "01-ai",
- "inference_platform": "unknown",
- "id": "01-ai/Yi-9B-200K"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23270921155866434
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4793302602023641
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06646525679758308
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31543624161073824
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42940625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36220079787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.829
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-9B/0ec59add-f9a9-4dbd-8a83-c6aec0b8ad21.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-9B/0ec59add-f9a9-4dbd-8a83-c6aec0b8ad21.json
deleted file mode 100644
index 6bafb1fe4e4497ec973917b08944bbbb864902dd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-9B/0ec59add-f9a9-4dbd-8a83-c6aec0b8ad21.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/01-ai_Yi-9B/1762652579.46702",
- "retrieved_timestamp": "1762652579.4670231",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "01-ai/Yi-9B",
- "developer": "01-ai",
- "inference_platform": "unknown",
- "id": "01-ai/Yi-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2708779372066118
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49396075125308075
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.055891238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3179530201342282
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40540624999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35738031914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.829
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-Coder-9B-Chat/ef0cc3a5-0d62-4a45-b0c7-28a6f7dfdac4.json b/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-Coder-9B-Chat/ef0cc3a5-0d62-4a45-b0c7-28a6f7dfdac4.json
deleted file mode 100644
index ea42c85e8c6e20238dd372bf7561d806af618d60..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/01-ai/01-ai_Yi-Coder-9B-Chat/ef0cc3a5-0d62-4a45-b0c7-28a6f7dfdac4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/01-ai_Yi-Coder-9B-Chat/1762652579.4674509",
- "retrieved_timestamp": "1762652579.4674518",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "01-ai/Yi-Coder-9B-Chat",
- "developer": "01-ai",
- "inference_platform": "unknown",
- "id": "01-ai/Yi-Coder-9B-Chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4817041006750976
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48142000339111674
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04003021148036254
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24748322147651006
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3991770833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24251994680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.829
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/1-800-LLMs/1-800-LLMs_Qwen-2.5-14B-Hindi-Custom-Instruct/a48b0864-76b7-4860-a448-942a8d74f68e.json b/leaderboard_data/HFOpenLLMv2/1-800-LLMs/1-800-LLMs_Qwen-2.5-14B-Hindi-Custom-Instruct/a48b0864-76b7-4860-a448-942a8d74f68e.json
deleted file mode 100644
index 6bc93d8c248fcee6de4f98e6940a93b0d8ab8e24..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/1-800-LLMs/1-800-LLMs_Qwen-2.5-14B-Hindi-Custom-Instruct/a48b0864-76b7-4860-a448-942a8d74f68e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/1-800-LLMs_Qwen-2.5-14B-Hindi-Custom-Instruct/1762652579.468073",
- "retrieved_timestamp": "1762652579.468074",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct",
- "developer": "1-800-LLMs",
- "inference_platform": "unknown",
- "id": "1-800-LLMs/Qwen-2.5-14B-Hindi-Custom-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30774677854758703
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6284322714967584
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.311178247734139
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3699664429530201
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4490625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.516373005319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/152334H/152334H_miqu-1-70b-sf/f57d7b8d-85d5-4e0b-8dec-31e2931487dd.json b/leaderboard_data/HFOpenLLMv2/152334H/152334H_miqu-1-70b-sf/f57d7b8d-85d5-4e0b-8dec-31e2931487dd.json
deleted file mode 100644
index 8b265339c8235a8aeb6c70e5c84c8ccead9aa3cc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/152334H/152334H_miqu-1-70b-sf/f57d7b8d-85d5-4e0b-8dec-31e2931487dd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/152334H_miqu-1-70b-sf/1762652579.469194",
- "retrieved_timestamp": "1762652579.469195",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "152334H/miqu-1-70b-sf",
- "developer": "152334H",
- "inference_platform": "unknown",
- "id": "152334H/miqu-1-70b-sf"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5181740005407873
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6102361685099691
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12462235649546828
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35067114093959734
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45820833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42278922872340424
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 68.977
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/1TuanPham/1TuanPham_T-VisStar-7B-v0.1/1347cd1b-2ebc-4223-900f-7c2479e228a3.json b/leaderboard_data/HFOpenLLMv2/1TuanPham/1TuanPham_T-VisStar-7B-v0.1/1347cd1b-2ebc-4223-900f-7c2479e228a3.json
deleted file mode 100644
index 4e3d74e80449b3f2267bcf508b5c2ee7c169702f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/1TuanPham/1TuanPham_T-VisStar-7B-v0.1/1347cd1b-2ebc-4223-900f-7c2479e228a3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/1TuanPham_T-VisStar-7B-v0.1/1762652579.469481",
- "retrieved_timestamp": "1762652579.469482",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "1TuanPham/T-VisStar-7B-v0.1",
- "developer": "1TuanPham",
- "inference_platform": "unknown",
- "id": "1TuanPham/T-VisStar-7B-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36070404305021786
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5052203113352468
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05740181268882175
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3210605053191489
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.294
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/1TuanPham/1TuanPham_T-VisStar-v0.1/b2926dd6-628c-4274-b0e8-1efc64269bb2.json b/leaderboard_data/HFOpenLLMv2/1TuanPham/1TuanPham_T-VisStar-v0.1/b2926dd6-628c-4274-b0e8-1efc64269bb2.json
deleted file mode 100644
index c479ea18409a706ef7ef8eba7db256cdc3459334..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/1TuanPham/1TuanPham_T-VisStar-v0.1/b2926dd6-628c-4274-b0e8-1efc64269bb2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/1TuanPham_T-VisStar-v0.1/1762652579.469921",
- "retrieved_timestamp": "1762652579.469923",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "1TuanPham/T-VisStar-v0.1",
- "developer": "1TuanPham",
- "inference_platform": "unknown",
- "id": "1TuanPham/T-VisStar-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36070404305021786
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5052203113352468
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05740181268882175
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3210605053191489
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.294
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/3rd-Degree-Burn/3rd-Degree-Burn_L-3.1-Science-Writer-8B/0c4fd071-b5c9-4bf1-a1d5-d658be1a3258.json b/leaderboard_data/HFOpenLLMv2/3rd-Degree-Burn/3rd-Degree-Burn_L-3.1-Science-Writer-8B/0c4fd071-b5c9-4bf1-a1d5-d658be1a3258.json
deleted file mode 100644
index d5171a37caa6daa46fba3351cd16213633d0a162..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/3rd-Degree-Burn/3rd-Degree-Burn_L-3.1-Science-Writer-8B/0c4fd071-b5c9-4bf1-a1d5-d658be1a3258.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/3rd-Degree-Burn_L-3.1-Science-Writer-8B/1762652579.470164",
- "retrieved_timestamp": "1762652579.470165",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "3rd-Degree-Burn/L-3.1-Science-Writer-8B",
- "developer": "3rd-Degree-Burn",
- "inference_platform": "unknown",
- "id": "3rd-Degree-Burn/L-3.1-Science-Writer-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42625012743963797
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5041306326216103
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10347432024169184
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3959479166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36494348404255317
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/4season/4season_final_model_test_v2/74973e37-cd82-4e8a-816a-02b035fabff4.json b/leaderboard_data/HFOpenLLMv2/4season/4season_final_model_test_v2/74973e37-cd82-4e8a-816a-02b035fabff4.json
deleted file mode 100644
index 864adc997ad1c4dbbc4a39bad72c6909d695c171..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/4season/4season_final_model_test_v2/74973e37-cd82-4e8a-816a-02b035fabff4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/4season_final_model_test_v2/1762652579.4714398",
- "retrieved_timestamp": "1762652579.4714408",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "4season/final_model_test_v2",
- "developer": "4season",
- "inference_platform": "unknown",
- "id": "4season/final_model_test_v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3191132860809319
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6342049783295018
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08383685800604229
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3271812080536913
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4314479166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3528091755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 21.421
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/AALF/AALF_FuseChat-Llama-3.1-8B-Instruct-preview/3766e8a0-99ad-4733-a01b-ced446b15eda.json b/leaderboard_data/HFOpenLLMv2/AALF/AALF_FuseChat-Llama-3.1-8B-Instruct-preview/3766e8a0-99ad-4733-a01b-ced446b15eda.json
deleted file mode 100644
index aa5ed6ea3e10809249d9a3d226a303a3d8e47760..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/AALF/AALF_FuseChat-Llama-3.1-8B-Instruct-preview/3766e8a0-99ad-4733-a01b-ced446b15eda.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AALF_FuseChat-Llama-3.1-8B-Instruct-preview/1762652579.471838",
- "retrieved_timestamp": "1762652579.471839",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AALF/FuseChat-Llama-3.1-8B-Instruct-preview",
- "developer": "AALF",
- "inference_platform": "unknown",
- "id": "AALF/FuseChat-Llama-3.1-8B-Instruct-preview"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7189579205397235
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5119887898349903
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24773413897280966
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38200000000000006
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3732546542553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/AALF/AALF_FuseChat-Llama-3.1-8B-SFT-preview/342ac912-805f-4166-b8f4-10f0503fa892.json b/leaderboard_data/HFOpenLLMv2/AALF/AALF_FuseChat-Llama-3.1-8B-SFT-preview/342ac912-805f-4166-b8f4-10f0503fa892.json
deleted file mode 100644
index 832cd58cda82acbcd1ee342ddea5c37885e0a536..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/AALF/AALF_FuseChat-Llama-3.1-8B-SFT-preview/342ac912-805f-4166-b8f4-10f0503fa892.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AALF_FuseChat-Llama-3.1-8B-SFT-preview/1762652579.472149",
- "retrieved_timestamp": "1762652579.47215",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AALF/FuseChat-Llama-3.1-8B-SFT-preview",
- "developer": "AALF",
- "inference_platform": "unknown",
- "id": "AALF/FuseChat-Llama-3.1-8B-SFT-preview"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7280504616639405
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5240303130445233
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22507552870090636
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40199999999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37433510638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/AGI-0/AGI-0_Art-v0-3B/162b6d5f-f983-4989-9603-f6baea26b633.json b/leaderboard_data/HFOpenLLMv2/AGI-0/AGI-0_Art-v0-3B/162b6d5f-f983-4989-9603-f6baea26b633.json
deleted file mode 100644
index 1b54cc9efcea78b4e962f9fd13ba9de61bcb55c6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/AGI-0/AGI-0_Art-v0-3B/162b6d5f-f983-4989-9603-f6baea26b633.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AGI-0_Art-v0-3B/1762652579.473539",
- "retrieved_timestamp": "1762652579.47354",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AGI-0/Art-v0-3B",
- "developer": "AGI-0",
- "inference_platform": "unknown",
- "id": "AGI-0/Art-v0-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.319238509377341
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3400959483013824
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24622356495468278
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3768229166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11785239361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/AI-MO/AI-MO_NuminaMath-7B-CoT/9ac2ba3c-9a21-46b2-a21c-4909cfae6315.json b/leaderboard_data/HFOpenLLMv2/AI-MO/AI-MO_NuminaMath-7B-CoT/9ac2ba3c-9a21-46b2-a21c-4909cfae6315.json
deleted file mode 100644
index cf802649493e5d01f917b41b3b91a0b739007589..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/AI-MO/AI-MO_NuminaMath-7B-CoT/9ac2ba3c-9a21-46b2-a21c-4909cfae6315.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AI-MO_NuminaMath-7B-CoT/1762652579.474318",
- "retrieved_timestamp": "1762652579.4743192",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AI-MO/NuminaMath-7B-CoT",
- "developer": "AI-MO",
- "inference_platform": "unknown",
- "id": "AI-MO/NuminaMath-7B-CoT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2688544173903022
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4314193495860012
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26963746223564955
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33034375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28681848404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.91
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/AI-MO/AI-MO_NuminaMath-7B-TIR/0ffa78d4-fe45-4639-bcd1-eb19ab168a35.json b/leaderboard_data/HFOpenLLMv2/AI-MO/AI-MO_NuminaMath-7B-TIR/0ffa78d4-fe45-4639-bcd1-eb19ab168a35.json
deleted file mode 100644
index 8f2466fce1f00d8e6ea8aadb27c886cb4cff3998..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/AI-MO/AI-MO_NuminaMath-7B-TIR/0ffa78d4-fe45-4639-bcd1-eb19ab168a35.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AI-MO_NuminaMath-7B-TIR/1762652579.474566",
- "retrieved_timestamp": "1762652579.474567",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AI-MO/NuminaMath-7B-TIR",
- "developer": "AI-MO",
- "inference_platform": "unknown",
- "id": "AI-MO/NuminaMath-7B-TIR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27562423259174545
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41436913375897894
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1608761329305136
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25838926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35092708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2732712765957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.91
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/AI-Sweden-Models/AI-Sweden-Models_Llama-3-8B-instruct/1d68bd2e-de6e-4327-a8f1-33322eba537e.json b/leaderboard_data/HFOpenLLMv2/AI-Sweden-Models/AI-Sweden-Models_Llama-3-8B-instruct/1d68bd2e-de6e-4327-a8f1-33322eba537e.json
deleted file mode 100644
index b8946e8a679d3b6c3a53ee6ee208d33ca44b5d9f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/AI-Sweden-Models/AI-Sweden-Models_Llama-3-8B-instruct/1d68bd2e-de6e-4327-a8f1-33322eba537e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AI-Sweden-Models_Llama-3-8B-instruct/1762652579.474785",
- "retrieved_timestamp": "1762652579.474786",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AI-Sweden-Models/Llama-3-8B-instruct",
- "developer": "AI-Sweden-Models",
- "inference_platform": "unknown",
- "id": "AI-Sweden-Models/Llama-3-8B-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24012841482821137
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4173460154515302
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03851963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47709375000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25972406914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/AI4free/AI4free_Dhanishtha/a554a3eb-943c-4135-966b-929129ef025d.json b/leaderboard_data/HFOpenLLMv2/AI4free/AI4free_Dhanishtha/a554a3eb-943c-4135-966b-929129ef025d.json
deleted file mode 100644
index 833d8203f648056b75d94c7f4c964322825e378d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/AI4free/AI4free_Dhanishtha/a554a3eb-943c-4135-966b-929129ef025d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AI4free_Dhanishtha/1762652579.475332",
- "retrieved_timestamp": "1762652579.475332",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AI4free/Dhanishtha",
- "developer": "AI4free",
- "inference_platform": "unknown",
- "id": "AI4free/Dhanishtha"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2451240486353985
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34039444943326375
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25604229607250756
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2525167785234899
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35694791666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16431183510638298
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/AI4free/AI4free_t2/332ccdb5-faf5-47c6-afeb-a91d2148adf0.json b/leaderboard_data/HFOpenLLMv2/AI4free/AI4free_t2/332ccdb5-faf5-47c6-afeb-a91d2148adf0.json
deleted file mode 100644
index 802924140cbd165c059621d12120ec7a04d9c9af..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/AI4free/AI4free_t2/332ccdb5-faf5-47c6-afeb-a91d2148adf0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AI4free_t2/1762652579.475577",
- "retrieved_timestamp": "1762652579.475578",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AI4free/t2",
- "developer": "AI4free",
- "inference_platform": "unknown",
- "id": "AI4free/t2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3866828902866616
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2910111436321769
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18957703927492447
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3846354166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11436170212765957
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/AIDC-AI/AIDC-AI_Marco-o1/17f7398f-675d-4b38-b233-64fc106737c3.json b/leaderboard_data/HFOpenLLMv2/AIDC-AI/AIDC-AI_Marco-o1/17f7398f-675d-4b38-b233-64fc106737c3.json
deleted file mode 100644
index 354dbfb93a0b9e5a30df5e169f609fec654ac096..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/AIDC-AI/AIDC-AI_Marco-o1/17f7398f-675d-4b38-b233-64fc106737c3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AIDC-AI_Marco-o1/1762652579.47579",
- "retrieved_timestamp": "1762652579.4757912",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AIDC-AI/Marco-o1",
- "developer": "AIDC-AI",
- "inference_platform": "unknown",
- "id": "AIDC-AI/Marco-o1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.477083028586373
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5364362696398749
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37462235649546827
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41384375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41165226063829785
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Aashraf995/Aashraf995_Creative-7B-nerd/7ea9f4db-5b52-40a5-904e-785e43302934.json b/leaderboard_data/HFOpenLLMv2/Aashraf995/Aashraf995_Creative-7B-nerd/7ea9f4db-5b52-40a5-904e-785e43302934.json
deleted file mode 100644
index 60c612ebe48d7ddd24f4cf7c53bfecfd42c7753b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Aashraf995/Aashraf995_Creative-7B-nerd/7ea9f4db-5b52-40a5-904e-785e43302934.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Aashraf995_Creative-7B-nerd/1762652579.476046",
- "retrieved_timestamp": "1762652579.476046",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Aashraf995/Creative-7B-nerd",
- "developer": "Aashraf995",
- "inference_platform": "unknown",
- "id": "Aashraf995/Creative-7B-nerd"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4721871301480073
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5606785565640195
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3164652567975831
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3263422818791946
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4515416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44921875
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/AbacusResearch/AbacusResearch_Jallabi-34B/76397277-901a-4ad0-9dae-0351ca875ec6.json b/leaderboard_data/HFOpenLLMv2/AbacusResearch/AbacusResearch_Jallabi-34B/76397277-901a-4ad0-9dae-0351ca875ec6.json
deleted file mode 100644
index c6cbac5b44ade247d6d8ec32750614dc468a4564..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/AbacusResearch/AbacusResearch_Jallabi-34B/76397277-901a-4ad0-9dae-0351ca875ec6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AbacusResearch_Jallabi-34B/1762652579.477037",
- "retrieved_timestamp": "1762652579.4770381",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AbacusResearch/Jallabi-34B",
- "developer": "AbacusResearch",
- "inference_platform": "unknown",
- "id": "AbacusResearch/Jallabi-34B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3528604103777976
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6023380603196266
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05211480362537765
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3389261744966443
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48217708333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4681682180851064
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.389
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Ahdoot/Ahdoot_StructuredThinker-v0.3-MoreStructure/81a5aafb-2cf7-490d-b619-ce638fcc8b38.json b/leaderboard_data/HFOpenLLMv2/Ahdoot/Ahdoot_StructuredThinker-v0.3-MoreStructure/81a5aafb-2cf7-490d-b619-ce638fcc8b38.json
deleted file mode 100644
index a56c303730d937c9b2ce456ef250f14002e8ec08..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Ahdoot/Ahdoot_StructuredThinker-v0.3-MoreStructure/81a5aafb-2cf7-490d-b619-ce638fcc8b38.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Ahdoot_StructuredThinker-v0.3-MoreStructure/1762652579.4772868",
- "retrieved_timestamp": "1762652579.477288",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Ahdoot/StructuredThinker-v0.3-MoreStructure",
- "developer": "Ahdoot",
- "inference_platform": "unknown",
- "id": "Ahdoot/StructuredThinker-v0.3-MoreStructure"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4192808415005519
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48376906494893984
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.290785498489426
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41582291666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36103723404255317
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.397
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Ahdoot/Ahdoot_Test_StealthThinker/43c907eb-3e43-47ff-b38d-f912ba6ef46c.json b/leaderboard_data/HFOpenLLMv2/Ahdoot/Ahdoot_Test_StealthThinker/43c907eb-3e43-47ff-b38d-f912ba6ef46c.json
deleted file mode 100644
index df4f9bab1e87cda2d9c0ef615b8ef6449dcf04bf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Ahdoot/Ahdoot_Test_StealthThinker/43c907eb-3e43-47ff-b38d-f912ba6ef46c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Ahdoot_Test_StealthThinker/1762652579.4775438",
- "retrieved_timestamp": "1762652579.4775438",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Ahdoot/Test_StealthThinker",
- "developer": "Ahdoot",
- "inference_platform": "unknown",
- "id": "Ahdoot/Test_StealthThinker"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42200361706937595
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46466398134666304
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17900302114803626
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42804166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35970744680851063
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V0-Coder/48732edf-8baf-438e-8a5c-763eee6c0c18.json b/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V0-Coder/48732edf-8baf-438e-8a5c-763eee6c0c18.json
deleted file mode 100644
index 3b8e9b3787b76cca88312b01ea21b83e50b633f5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V0-Coder/48732edf-8baf-438e-8a5c-763eee6c0c18.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V0-Coder/1762652579.478028",
- "retrieved_timestamp": "1762652579.478029",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder",
- "developer": "AicoresSecurity",
- "inference_platform": "unknown",
- "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V0-Coder"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7097656440466851
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4477501104993749
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1487915407854985
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27181208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34079166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3178191489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V0/38f169f0-e939-4b12-8f78-b2a27fb90de0.json b/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V0/38f169f0-e939-4b12-8f78-b2a27fb90de0.json
deleted file mode 100644
index 9eddb6a2cd89c9ebc27b35f9c2a93e7b317c3111..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V0/38f169f0-e939-4b12-8f78-b2a27fb90de0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V0/1762652579.4777558",
- "retrieved_timestamp": "1762652579.477757",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V0",
- "developer": "AicoresSecurity",
- "inference_platform": "unknown",
- "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6358018945287394
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4497434194912941
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11555891238670694
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33136458333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.301030585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V1.1/e8c63728-a1f5-432f-bf9f-204b0f4041aa.json b/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V1.1/e8c63728-a1f5-432f-bf9f-204b0f4041aa.json
deleted file mode 100644
index cfc591eef06683430fc989011a71370af4a92713..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V1.1/e8c63728-a1f5-432f-bf9f-204b0f4041aa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V1.1/1762652579.478466",
- "retrieved_timestamp": "1762652579.478467",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V1.1",
- "developer": "AicoresSecurity",
- "inference_platform": "unknown",
- "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6730209178313542
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4391775517124728
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17598187311178248
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35409375000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.308843085106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V1/b613ecbe-7b2b-4b03-ab2c-163f9988a8fc.json b/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V1/b613ecbe-7b2b-4b03-ab2c-163f9988a8fc.json
deleted file mode 100644
index 6369ee477ebbc7cceab4052618e261e07657230f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/AicoresSecurity/AicoresSecurity_Cybernet-Sec-3B-R1-V1/b613ecbe-7b2b-4b03-ab2c-163f9988a8fc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AicoresSecurity_Cybernet-Sec-3B-R1-V1/1762652579.478252",
- "retrieved_timestamp": "1762652579.4782531",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AicoresSecurity/Cybernet-Sec-3B-R1-V1",
- "developer": "AicoresSecurity",
- "inference_platform": "unknown",
- "id": "AicoresSecurity/Cybernet-Sec-3B-R1-V1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6145693426774292
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4282342020189216
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15181268882175228
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32869791666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2876496010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Alepach/Alepach_notHumpback-M0/1a4477f7-c414-41ab-bbcb-593f4a86031a.json b/leaderboard_data/HFOpenLLMv2/Alepach/Alepach_notHumpback-M0/1a4477f7-c414-41ab-bbcb-593f4a86031a.json
deleted file mode 100644
index a96d316491db45c484045468d889889ba9a84ee1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Alepach/Alepach_notHumpback-M0/1a4477f7-c414-41ab-bbcb-593f4a86031a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Alepach_notHumpback-M0/1762652579.4786859",
- "retrieved_timestamp": "1762652579.478687",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Alepach/notHumpback-M0",
- "developer": "Alepach",
- "inference_platform": "unknown",
- "id": "Alepach/notHumpback-M0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23500755772461512
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27849287879199425
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0188821752265861
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24916107382550334
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35523958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1118683510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Alepach/Alepach_notHumpback-M1-v2/27c6c36d-6bd5-439b-bdc8-1bd0f8f4c9ea.json b/leaderboard_data/HFOpenLLMv2/Alepach/Alepach_notHumpback-M1-v2/27c6c36d-6bd5-439b-bdc8-1bd0f8f4c9ea.json
deleted file mode 100644
index 02499a3dda39aed6e5cfbf103e0f1ed55b4a1a9b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Alepach/Alepach_notHumpback-M1-v2/27c6c36d-6bd5-439b-bdc8-1bd0f8f4c9ea.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Alepach_notHumpback-M1-v2/1762652579.4791439",
- "retrieved_timestamp": "1762652579.479145",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Alepach/notHumpback-M1-v2",
- "developer": "Alepach",
- "inference_platform": "unknown",
- "id": "Alepach/notHumpback-M1-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2277135777514772
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2775640398406834
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02190332326283988
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3473333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1118683510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Alepach/Alepach_notHumpback-M1/030f17b0-036f-4021-90da-6c1d38da659d.json b/leaderboard_data/HFOpenLLMv2/Alepach/Alepach_notHumpback-M1/030f17b0-036f-4021-90da-6c1d38da659d.json
deleted file mode 100644
index 87a8aec8e8bc54bf85e2eda6ccedaac6b60fa714..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Alepach/Alepach_notHumpback-M1/030f17b0-036f-4021-90da-6c1d38da659d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Alepach_notHumpback-M1/1762652579.478936",
- "retrieved_timestamp": "1762652579.4789371",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Alepach/notHumpback-M1",
- "developer": "Alepach",
- "inference_platform": "unknown",
- "id": "Alepach/notHumpback-M1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2206944241279804
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28824720129981835
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015861027190332326
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23741610738255034
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.342
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10912566489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Alibaba-NLP/Alibaba-NLP_gte-Qwen2-7B-instruct/39ea9329-5ed7-46ea-bcc4-30679a63b405.json b/leaderboard_data/HFOpenLLMv2/Alibaba-NLP/Alibaba-NLP_gte-Qwen2-7B-instruct/39ea9329-5ed7-46ea-bcc4-30679a63b405.json
deleted file mode 100644
index a6ef71f97ffcf14f33c1b756d40f88d04fb23d9a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Alibaba-NLP/Alibaba-NLP_gte-Qwen2-7B-instruct/39ea9329-5ed7-46ea-bcc4-30679a63b405.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Alibaba-NLP_gte-Qwen2-7B-instruct/1762652579.479603",
- "retrieved_timestamp": "1762652579.479604",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Alibaba-NLP/gte-Qwen2-7B-instruct",
- "developer": "Alibaba-NLP",
- "inference_platform": "unknown",
- "id": "Alibaba-NLP/gte-Qwen2-7B-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22554045488193547
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4495144990818469
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06419939577039276
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24496644295302014
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35585416666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33211436170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAI2006/ef37c096-a089-4d3e-9fad-c0f959a18bb3.json b/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAI2006/ef37c096-a089-4d3e-9fad-c0f959a18bb3.json
deleted file mode 100644
index e702ed215d7dda47e1d00065760a93ae40f5ac55..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAI2006/ef37c096-a089-4d3e-9fad-c0f959a18bb3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Amaorynho_BBAI2006/1762652579.480136",
- "retrieved_timestamp": "1762652579.4801369",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Amaorynho/BBAI2006",
- "developer": "Amaorynho",
- "inference_platform": "unknown",
- "id": "Amaorynho/BBAI2006"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14670518668244703
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2704366990167133
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2525167785234899
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3605416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11228390957446809
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.09
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAI270V4/183313de-d526-42a9-a35d-a4e71466e546.json b/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAI270V4/183313de-d526-42a9-a35d-a4e71466e546.json
deleted file mode 100644
index 454e2250788b9f5d9d28ed9add8c13c41b522881..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAI270V4/183313de-d526-42a9-a35d-a4e71466e546.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Amaorynho_BBAI270V4/1762652579.4803882",
- "retrieved_timestamp": "1762652579.4803882",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Amaorynho/BBAI270V4",
- "developer": "Amaorynho",
- "inference_platform": "unknown",
- "id": "Amaorynho/BBAI270V4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1990374428737971
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30712046736502824
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.008308157099697885
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24580536912751677
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33139583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11136968085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAIIFEV1/7c0342a3-5bd4-47b0-b238-d5dcb0f6236e.json b/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAIIFEV1/7c0342a3-5bd4-47b0-b238-d5dcb0f6236e.json
deleted file mode 100644
index 356439e4d5973180c3fd856b0daf211487842ea7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAIIFEV1/7c0342a3-5bd4-47b0-b238-d5dcb0f6236e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Amaorynho_BBAIIFEV1/1762652579.480599",
- "retrieved_timestamp": "1762652579.4806",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Amaorynho/BBAIIFEV1",
- "developer": "Amaorynho",
- "inference_platform": "unknown",
- "id": "Amaorynho/BBAIIFEV1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8047369867507104
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5292462038560509
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1933534743202417
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4184895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3857214095744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAI_375/ad4b6e40-883c-47c5-ba33-6c112c2c6b09.json b/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAI_375/ad4b6e40-883c-47c5-ba33-6c112c2c6b09.json
deleted file mode 100644
index 3e6868b2b1fd6efbcdd007369f03315e116b848f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Amaorynho/Amaorynho_BBAI_375/ad4b6e40-883c-47c5-ba33-6c112c2c6b09.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Amaorynho_BBAI_375/1762652579.480799",
- "retrieved_timestamp": "1762652579.480799",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Amaorynho/BBAI_375",
- "developer": "Amaorynho",
- "inference_platform": "unknown",
- "id": "Amaorynho/BBAI_375"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14670518668244703
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2704366990167133
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2525167785234899
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3605416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11228390957446809
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.09
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Amu/Amu_t1-1.5B/3e967795-680c-4bfc-906b-eadb969cf2bd.json b/leaderboard_data/HFOpenLLMv2/Amu/Amu_t1-1.5B/3e967795-680c-4bfc-906b-eadb969cf2bd.json
deleted file mode 100644
index 865361cee5ecf4b1777ae668896b0e18b791ae51..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Amu/Amu_t1-1.5B/3e967795-680c-4bfc-906b-eadb969cf2bd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Amu_t1-1.5B/1762652579.481014",
- "retrieved_timestamp": "1762652579.481015",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Amu/t1-1.5B",
- "developer": "Amu",
- "inference_platform": "unknown",
- "id": "Amu/t1-1.5B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3393717558300864
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4007606984109216
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0513595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24328859060402686
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3517083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2566489361702128
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Amu/Amu_t1-3B/c0b7e3e6-4160-4482-af4f-038ae79c7578.json b/leaderboard_data/HFOpenLLMv2/Amu/Amu_t1-3B/c0b7e3e6-4160-4482-af4f-038ae79c7578.json
deleted file mode 100644
index a43460feab59b76238df6ef034f8ad922b265c88..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Amu/Amu_t1-3B/c0b7e3e6-4160-4482-af4f-038ae79c7578.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Amu_t1-3B/1762652579.481272",
- "retrieved_timestamp": "1762652579.4812732",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Amu/t1-3B",
- "developer": "Amu",
- "inference_platform": "unknown",
- "id": "Amu/t1-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33277703160946287
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39989750143834385
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13746223564954682
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2407718120805369
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34348958333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12840757978723405
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.397
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ArliAI/ArliAI_ArliAI-RPMax-12B-v1.1/63fc1679-8504-41a0-98d5-2d23aad57b81.json b/leaderboard_data/HFOpenLLMv2/ArliAI/ArliAI_ArliAI-RPMax-12B-v1.1/63fc1679-8504-41a0-98d5-2d23aad57b81.json
deleted file mode 100644
index b2a410e0de208aa5450384b0567b9b6657fd674c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ArliAI/ArliAI_ArliAI-RPMax-12B-v1.1/63fc1679-8504-41a0-98d5-2d23aad57b81.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ArliAI_ArliAI-RPMax-12B-v1.1/1762652579.481497",
- "retrieved_timestamp": "1762652579.481498",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ArliAI/ArliAI-RPMax-12B-v1.1",
- "developer": "ArliAI",
- "inference_platform": "unknown",
- "id": "ArliAI/ArliAI-RPMax-12B-v1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5348852156721942
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.475181760840119
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11253776435045318
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28187919463087246
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36184375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3384308510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Arthur-LAGACHERIE/Arthur-LAGACHERIE_Precis-1B-Instruct/d93c70b5-cb3b-4647-aa47-15c2401f5ebf.json b/leaderboard_data/HFOpenLLMv2/Arthur-LAGACHERIE/Arthur-LAGACHERIE_Precis-1B-Instruct/d93c70b5-cb3b-4647-aa47-15c2401f5ebf.json
deleted file mode 100644
index ab1b37a96d33c276f575587293704cfcc01db046..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Arthur-LAGACHERIE/Arthur-LAGACHERIE_Precis-1B-Instruct/d93c70b5-cb3b-4647-aa47-15c2401f5ebf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Arthur-LAGACHERIE_Precis-1B-Instruct/1762652579.482005",
- "retrieved_timestamp": "1762652579.482006",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Arthur-LAGACHERIE/Precis-1B-Instruct",
- "developer": "Arthur-LAGACHERIE",
- "inference_platform": "unknown",
- "id": "Arthur-LAGACHERIE/Precis-1B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3670738086056109
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3223614510687368
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0037764350453172208
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34355208333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14261968085106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.236
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Artples/Artples_L-MChat-7b/7aeaf034-1c02-4da7-b7b4-9a27ce759601.json b/leaderboard_data/HFOpenLLMv2/Artples/Artples_L-MChat-7b/7aeaf034-1c02-4da7-b7b4-9a27ce759601.json
deleted file mode 100644
index 9a6458ce2e10782e6e79b9984ce67c97c52ca3cb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Artples/Artples_L-MChat-7b/7aeaf034-1c02-4da7-b7b4-9a27ce759601.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Artples_L-MChat-7b/1762652579.482251",
- "retrieved_timestamp": "1762652579.482251",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Artples/L-MChat-7b",
- "developer": "Artples",
- "inference_platform": "unknown",
- "id": "Artples/L-MChat-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5296646231997766
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46003301674679414
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09214501510574018
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4028645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3298703457446808
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Artples/Artples_L-MChat-Small/0e5a84e3-b90f-4c20-ad58-4d1cf3517f28.json b/leaderboard_data/HFOpenLLMv2/Artples/Artples_L-MChat-Small/0e5a84e3-b90f-4c20-ad58-4d1cf3517f28.json
deleted file mode 100644
index 0c48fa262087b1913c2aefba7dc832a918bcbfc9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Artples/Artples_L-MChat-Small/0e5a84e3-b90f-4c20-ad58-4d1cf3517f28.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Artples_L-MChat-Small/1762652579.4824991",
- "retrieved_timestamp": "1762652579.4825",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Artples/L-MChat-Small",
- "developer": "Artples",
- "inference_platform": "unknown",
- "id": "Artples/L-MChat-Small"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32870561222002065
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48225627665257265
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0377643504531722
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36959375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24642619680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "PhiForCausalLM",
- "params_billions": 2.78
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Aryanne/Aryanne_SHBA/a1c56b87-d8d4-4570-9c33-b84dd066d92f.json b/leaderboard_data/HFOpenLLMv2/Aryanne/Aryanne_SHBA/a1c56b87-d8d4-4570-9c33-b84dd066d92f.json
deleted file mode 100644
index cb58053079b9ee3e52750a60c395ed9c9e3fbe54..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Aryanne/Aryanne_SHBA/a1c56b87-d8d4-4570-9c33-b84dd066d92f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Aryanne_SHBA/1762652579.482961",
- "retrieved_timestamp": "1762652579.482962",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Aryanne/SHBA",
- "developer": "Aryanne",
- "inference_platform": "unknown",
- "id": "Aryanne/SHBA"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7816560060639104
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5233174837035715
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1797583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41613541666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3892121010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Aryanne/Aryanne_SuperHeart/c6fae489-9bf8-40e5-a602-1c6ce9000537.json b/leaderboard_data/HFOpenLLMv2/Aryanne/Aryanne_SuperHeart/c6fae489-9bf8-40e5-a602-1c6ce9000537.json
deleted file mode 100644
index 9d8c9888df4f63350ae4068437fcb627c155a997..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Aryanne/Aryanne_SuperHeart/c6fae489-9bf8-40e5-a602-1c6ce9000537.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Aryanne_SuperHeart/1762652579.483199",
- "retrieved_timestamp": "1762652579.4832",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Aryanne/SuperHeart",
- "developer": "Aryanne",
- "inference_platform": "unknown",
- "id": "Aryanne/SuperHeart"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5192234382549413
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5215375046264326
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15634441087613293
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011744966442953
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44357291666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3912067819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Ateron/Ateron_Glowing-Forest-12B/13716fd0-049a-4e9a-90ca-af9db59c1703.json b/leaderboard_data/HFOpenLLMv2/Ateron/Ateron_Glowing-Forest-12B/13716fd0-049a-4e9a-90ca-af9db59c1703.json
deleted file mode 100644
index 600307e405832bb27a6344253b3a43f282fd86c4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Ateron/Ateron_Glowing-Forest-12B/13716fd0-049a-4e9a-90ca-af9db59c1703.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Ateron_Glowing-Forest-12B/1762652579.484101",
- "retrieved_timestamp": "1762652579.4841018",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Ateron/Glowing-Forest-12B",
- "developer": "Ateron",
- "inference_platform": "unknown",
- "id": "Ateron/Glowing-Forest-12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3591803082487799
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.549176294722067
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07779456193353475
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33305369127516776
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44490625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37175864361702127
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Ateron/Ateron_Lotus-Magpic/bedab846-a6b2-4c51-9690-27deb7a76fe7.json b/leaderboard_data/HFOpenLLMv2/Ateron/Ateron_Lotus-Magpic/bedab846-a6b2-4c51-9690-27deb7a76fe7.json
deleted file mode 100644
index 3c4642a4e4c9f145e6319ed88f0fcb7f3aa4b068..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Ateron/Ateron_Lotus-Magpic/bedab846-a6b2-4c51-9690-27deb7a76fe7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Ateron_Lotus-Magpic/1762652579.484373",
- "retrieved_timestamp": "1762652579.484374",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Ateron/Lotus-Magpic",
- "developer": "Ateron",
- "inference_platform": "unknown",
- "id": "Ateron/Lotus-Magpic"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6286076499244228
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5253514950133299
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09969788519637462
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4331875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3490691489361702
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Ateron/Ateron_Way_of_MagPicaro/0a5e585d-1a90-4849-9df5-670a56b9f161.json b/leaderboard_data/HFOpenLLMv2/Ateron/Ateron_Way_of_MagPicaro/0a5e585d-1a90-4849-9df5-670a56b9f161.json
deleted file mode 100644
index 3d5e7b239b629f317859a85da386a3d2a61394bf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Ateron/Ateron_Way_of_MagPicaro/0a5e585d-1a90-4849-9df5-670a56b9f161.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Ateron_Way_of_MagPicaro/1762652579.484595",
- "retrieved_timestamp": "1762652579.484596",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Ateron/Way_of_MagPicaro",
- "developer": "Ateron",
- "inference_platform": "unknown",
- "id": "Ateron/Way_of_MagPicaro"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2637091805298829
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5427386861946704
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05891238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3338926174496644
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46490625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35355718085106386
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-4B/5fe88e89-1055-4357-9394-004dd4635e58.json b/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-4B/5fe88e89-1055-4357-9394-004dd4635e58.json
deleted file mode 100644
index 7ad40e181233bdd6f060ea20ced6b98c7993f0cb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-4B/5fe88e89-1055-4357-9394-004dd4635e58.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AuraIndustries_Aura-4B/1762652579.484812",
- "retrieved_timestamp": "1762652579.484813",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AuraIndustries/Aura-4B",
- "developer": "AuraIndustries",
- "inference_platform": "unknown",
- "id": "AuraIndustries/Aura-4B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38156203318306536
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4490409465001946
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04229607250755287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.287751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39384375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27061170212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 4.513
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-8B/39e029ad-b385-4b26-9a02-b40c90cd8ad8.json b/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-8B/39e029ad-b385-4b26-9a02-b40c90cd8ad8.json
deleted file mode 100644
index c67454836729d8da7b820fdbd6a55c5b38af90b9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-8B/39e029ad-b385-4b26-9a02-b40c90cd8ad8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AuraIndustries_Aura-8B/1762652579.485057",
- "retrieved_timestamp": "1762652579.485057",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AuraIndustries/Aura-8B",
- "developer": "AuraIndustries",
- "inference_platform": "unknown",
- "id": "AuraIndustries/Aura-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7205315230255722
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5131231419849063
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15181268882175228
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2860738255033557
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4004479166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38738364361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-MoE-2x4B-v2/3402882b-af4e-4509-9d57-32efa5d8c495.json b/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-MoE-2x4B-v2/3402882b-af4e-4509-9d57-32efa5d8c495.json
deleted file mode 100644
index 5693b31935b1c0c6bc276120d3b4fdbeeb97ebe9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-MoE-2x4B-v2/3402882b-af4e-4509-9d57-32efa5d8c495.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AuraIndustries_Aura-MoE-2x4B-v2/1762652579.4855082",
- "retrieved_timestamp": "1762652579.4855092",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AuraIndustries/Aura-MoE-2x4B-v2",
- "developer": "AuraIndustries",
- "inference_platform": "unknown",
- "id": "AuraIndustries/Aura-MoE-2x4B-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4777822843388875
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43152444292813597
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03172205438066465
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.287751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4100625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609707446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 7.231
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-MoE-2x4B/8239ffac-3fca-4eab-86d4-78bab22dc420.json b/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-MoE-2x4B/8239ffac-3fca-4eab-86d4-78bab22dc420.json
deleted file mode 100644
index bb5cb66aa9d2fd2674e8ee6cd853cd4616c52402..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/AuraIndustries/AuraIndustries_Aura-MoE-2x4B/8239ffac-3fca-4eab-86d4-78bab22dc420.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AuraIndustries_Aura-MoE-2x4B/1762652579.48526",
- "retrieved_timestamp": "1762652579.485261",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AuraIndustries/Aura-MoE-2x4B",
- "developer": "AuraIndustries",
- "inference_platform": "unknown",
- "id": "AuraIndustries/Aura-MoE-2x4B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.460096987105325
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43385067041774666
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.030966767371601207
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27181208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40851041666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26496010638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 7.231
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Aurel9/Aurel9_testmerge-7b/eb45737a-74bc-482d-9d7f-d2bd1d876c77.json b/leaderboard_data/HFOpenLLMv2/Aurel9/Aurel9_testmerge-7b/eb45737a-74bc-482d-9d7f-d2bd1d876c77.json
deleted file mode 100644
index dbaa6ce3eb875aec16f10405decad7ee230b8e34..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Aurel9/Aurel9_testmerge-7b/eb45737a-74bc-482d-9d7f-d2bd1d876c77.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Aurel9_testmerge-7b/1762652579.485724",
- "retrieved_timestamp": "1762652579.485725",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Aurel9/testmerge-7b",
- "developer": "Aurel9",
- "inference_platform": "unknown",
- "id": "Aurel9/testmerge-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3979984219648311
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5189590919105128
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06570996978851963
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4658645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3052692819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Ayush-Singh/Ayush-Singh_Llama1B-sft-2/678cad7f-854b-4dc3-91cc-2d1774ef7faf.json b/leaderboard_data/HFOpenLLMv2/Ayush-Singh/Ayush-Singh_Llama1B-sft-2/678cad7f-854b-4dc3-91cc-2d1774ef7faf.json
deleted file mode 100644
index ec3fca4c46139cbfc90e37959a3aae3b639329e5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Ayush-Singh/Ayush-Singh_Llama1B-sft-2/678cad7f-854b-4dc3-91cc-2d1774ef7faf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Ayush-Singh_Llama1B-sft-2/1762652579.4859679",
- "retrieved_timestamp": "1762652579.4859688",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Ayush-Singh/Llama1B-sft-2",
- "developer": "Ayush-Singh",
- "inference_platform": "unknown",
- "id": "Ayush-Singh/Llama1B-sft-2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13743755457741016
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.283428204214368
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24580536912751677
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35520833333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11170212765957446
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.236
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_Blossom-V6-14B/24ce59a5-c351-4ed8-8944-8ec5db739da8.json b/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_Blossom-V6-14B/24ce59a5-c351-4ed8-8944-8ec5db739da8.json
deleted file mode 100644
index f3857256defd9a8a0d99ddb54739d2ac88720e03..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_Blossom-V6-14B/24ce59a5-c351-4ed8-8944-8ec5db739da8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Azure99_Blossom-V6-14B/1762652579.486225",
- "retrieved_timestamp": "1762652579.4862258",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Azure99/Blossom-V6-14B",
- "developer": "Azure99",
- "inference_platform": "unknown",
- "id": "Azure99/Blossom-V6-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6395486198841297
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5068726694646123
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.525679758308157
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40352083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4543716755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_Blossom-V6-7B/35949fb3-8c01-45cf-b4db-bbe983b15ac6.json b/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_Blossom-V6-7B/35949fb3-8c01-45cf-b4db-bbe983b15ac6.json
deleted file mode 100644
index 5a6876c4a0168b7c1bf446b1fbd20add3ce16b30..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_Blossom-V6-7B/35949fb3-8c01-45cf-b4db-bbe983b15ac6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Azure99_Blossom-V6-7B/1762652579.486468",
- "retrieved_timestamp": "1762652579.486469",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Azure99/Blossom-V6-7B",
- "developer": "Azure99",
- "inference_platform": "unknown",
- "id": "Azure99/Blossom-V6-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5538194213575536
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49736683240887
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45845921450151056
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43009375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41439494680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_blossom-v5-32b/6adfe39d-f2c2-4101-8f0f-7496d55397cd.json b/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_blossom-v5-32b/6adfe39d-f2c2-4101-8f0f-7496d55397cd.json
deleted file mode 100644
index 671fce54d50857cc9ea7348b97c8c10ee9c44413..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_blossom-v5-32b/6adfe39d-f2c2-4101-8f0f-7496d55397cd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Azure99_blossom-v5-32b/1762652579.4866729",
- "retrieved_timestamp": "1762652579.4866738",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Azure99/blossom-v5-32b",
- "developer": "Azure99",
- "inference_platform": "unknown",
- "id": "Azure99/blossom-v5-32b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5235441960664371
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5954545257004673
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1865558912386707
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.311241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40199999999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4234541223404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.512
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_blossom-v5.1-34b/d2342413-1b55-4da5-a6e5-da6274f309ad.json b/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_blossom-v5.1-34b/d2342413-1b55-4da5-a6e5-da6274f309ad.json
deleted file mode 100644
index 53abe01ec5c746a5138bdf9f725be8e8e0f8a410..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_blossom-v5.1-34b/d2342413-1b55-4da5-a6e5-da6274f309ad.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Azure99_blossom-v5.1-34b/1762652579.4871309",
- "retrieved_timestamp": "1762652579.4871309",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Azure99/blossom-v5.1-34b",
- "developer": "Azure99",
- "inference_platform": "unknown",
- "id": "Azure99/blossom-v5.1-34b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5696562897556262
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6109110096611161
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2590634441087613
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39279166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4557845744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.389
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_blossom-v5.1-9b/8eb55323-b0d7-4419-aec6-03de8bcd472e.json b/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_blossom-v5.1-9b/8eb55323-b0d7-4419-aec6-03de8bcd472e.json
deleted file mode 100644
index f61dcc05f1667418d7b0e1a14360aaaa60647f62..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Azure99/Azure99_blossom-v5.1-9b/8eb55323-b0d7-4419-aec6-03de8bcd472e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Azure99_blossom-v5.1-9b/1762652579.487347",
- "retrieved_timestamp": "1762652579.487348",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Azure99/blossom-v5.1-9b",
- "developer": "Azure99",
- "inference_platform": "unknown",
- "id": "Azure99/blossom-v5.1-9b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5085816744016985
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5343292377916368
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2122356495468278
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33557046979865773
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39939583333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39793882978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.829
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0613-Llama3-70B/69cea95c-c167-42f4-a233-f7739f86f6a7.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0613-Llama3-70B/69cea95c-c167-42f4-a233-f7739f86f6a7.json
deleted file mode 100644
index d8fe59d837420d312bd3941130ae14240d008df1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0613-Llama3-70B/69cea95c-c167-42f4-a233-f7739f86f6a7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0613-Llama3-70B/1762652579.487831",
- "retrieved_timestamp": "1762652579.487832",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BAAI/Infinity-Instruct-3M-0613-Llama3-70B",
- "developer": "BAAI",
- "inference_platform": "unknown",
- "id": "BAAI/Infinity-Instruct-3M-0613-Llama3-70B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6821134589555713
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6641614484348598
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21525679758308158
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35822147651006714
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45226041666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47298869680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0613-Mistral-7B/9d9ac91a-f339-41a4-ae91-3dba41b06382.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0613-Mistral-7B/9d9ac91a-f339-41a4-ae91-3dba41b06382.json
deleted file mode 100644
index 426cb5598ee391224c6ee54c236010c153e8c78f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0613-Mistral-7B/9d9ac91a-f339-41a4-ae91-3dba41b06382.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0613-Mistral-7B/1762652579.48831",
- "retrieved_timestamp": "1762652579.4883142",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BAAI/Infinity-Instruct-3M-0613-Mistral-7B",
- "developer": "BAAI",
- "inference_platform": "unknown",
- "id": "BAAI/Infinity-Instruct-3M-0613-Mistral-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5319873491225504
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49582333763258896
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08157099697885196
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4350833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31607380319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Llama3-70B/73eb53bc-a090-4415-8fdc-a767a2e00188.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Llama3-70B/73eb53bc-a090-4415-8fdc-a767a2e00188.json
deleted file mode 100644
index b05cb69e6ff6351a511555b421c8f73ed0c27336..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Llama3-70B/73eb53bc-a090-4415-8fdc-a767a2e00188.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Llama3-70B/1762652579.4887528",
- "retrieved_timestamp": "1762652579.488755",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BAAI/Infinity-Instruct-3M-0625-Llama3-70B",
- "developer": "BAAI",
- "inference_platform": "unknown",
- "id": "BAAI/Infinity-Instruct-3M-0625-Llama3-70B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7442120240960651
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6670337872930245
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22507552870090636
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3573825503355705
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46165625000000005
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4586103723404255
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Llama3-8B/00d87824-732a-4746-8d9f-ce7b1f45c0ae.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Llama3-8B/00d87824-732a-4746-8d9f-ce7b1f45c0ae.json
deleted file mode 100644
index 4afbfb41565c82c49c3cd4b15cda08567ddc5e51..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Llama3-8B/00d87824-732a-4746-8d9f-ce7b1f45c0ae.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Llama3-8B/1762652579.4890082",
- "retrieved_timestamp": "1762652579.489009",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BAAI/Infinity-Instruct-3M-0625-Llama3-8B",
- "developer": "BAAI",
- "inference_platform": "unknown",
- "id": "BAAI/Infinity-Instruct-3M-0625-Llama3-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6050268842227512
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4954985723563075
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08836858006042296
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2751677852348993
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37120833333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3252160904255319
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Mistral-7B/be3423f2-98f0-414a-b0c3-efd0d60d4cb3.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Mistral-7B/be3423f2-98f0-414a-b0c3-efd0d60d4cb3.json
deleted file mode 100644
index 3d04b7f6606d686deb04773cf7799aa2630292f0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Mistral-7B/be3423f2-98f0-414a-b0c3-efd0d60d4cb3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Mistral-7B/1762652579.489246",
- "retrieved_timestamp": "1762652579.489247",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BAAI/Infinity-Instruct-3M-0625-Mistral-7B",
- "developer": "BAAI",
- "inference_platform": "unknown",
- "id": "BAAI/Infinity-Instruct-3M-0625-Mistral-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5867420666054957
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4939670574681802
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07628398791540786
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28691275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42723958333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3229720744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Qwen2-7B/2390d668-3273-4f58-a0fd-b13b9d9b1651.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Qwen2-7B/2390d668-3273-4f58-a0fd-b13b9d9b1651.json
deleted file mode 100644
index 367ecd17700b8d8929ef8b1df37faa0709b6711b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Qwen2-7B/2390d668-3273-4f58-a0fd-b13b9d9b1651.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Qwen2-7B/1762652579.489471",
- "retrieved_timestamp": "1762652579.489472",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BAAI/Infinity-Instruct-3M-0625-Qwen2-7B",
- "developer": "BAAI",
- "inference_platform": "unknown",
- "id": "BAAI/Infinity-Instruct-3M-0625-Qwen2-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5553930238434022
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5345911997776569
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19259818731117825
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38876041666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39602726063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Yi-1.5-9B/8a2d5e9c-7d41-4638-8b8c-58d08fc0912b.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Yi-1.5-9B/8a2d5e9c-7d41-4638-8b8c-58d08fc0912b.json
deleted file mode 100644
index a42525cdae5d295def4835fbfd3a14aa496edce3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-3M-0625-Yi-1.5-9B/8a2d5e9c-7d41-4638-8b8c-58d08fc0912b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-3M-0625-Yi-1.5-9B/1762652579.489686",
- "retrieved_timestamp": "1762652579.489687",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B",
- "developer": "BAAI",
- "inference_platform": "unknown",
- "id": "BAAI/Infinity-Instruct-3M-0625-Yi-1.5-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5185984299436606
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5509115146247398
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16389728096676737
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3540268456375839
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45753125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41181848404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.829
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-0729-Llama3_1-8B/eace7f56-b853-436d-a744-bfdb9e227993.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-0729-Llama3_1-8B/eace7f56-b853-436d-a744-bfdb9e227993.json
deleted file mode 100644
index 6ec710e814b5a73faf217fe9f3600cbfd0d4d75a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-0729-Llama3_1-8B/eace7f56-b853-436d-a744-bfdb9e227993.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-0729-Llama3_1-8B/1762652579.489912",
- "retrieved_timestamp": "1762652579.489913",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B",
- "developer": "BAAI",
- "inference_platform": "unknown",
- "id": "BAAI/Infinity-Instruct-7M-0729-Llama3_1-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6131952109292234
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5077335431381055
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12764350453172205
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29278523489932884
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35784375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3223902925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-0729-mistral-7B/25477dff-04c5-4cb8-9ad9-3a13448a2a7d.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-0729-mistral-7B/25477dff-04c5-4cb8-9ad9-3a13448a2a7d.json
deleted file mode 100644
index 1ee53694737ca916d7a56b69979169579d3ef4fd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-0729-mistral-7B/25477dff-04c5-4cb8-9ad9-3a13448a2a7d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-0729-mistral-7B/1762652579.490131",
- "retrieved_timestamp": "1762652579.490131",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BAAI/Infinity-Instruct-7M-0729-mistral-7B",
- "developer": "BAAI",
- "inference_platform": "unknown",
- "id": "BAAI/Infinity-Instruct-7M-0729-mistral-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6161928128476886
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4963813586525743
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08308157099697885
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4061875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3273769946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-Gen-Llama3_1-70B/b04b4e4d-2f15-446b-974f-21f72fd80fe0.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-Gen-Llama3_1-70B/b04b4e4d-2f15-446b-974f-21f72fd80fe0.json
deleted file mode 100644
index e41be63563b1637e760bd6bb8da88fa1eace3846..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-Gen-Llama3_1-70B/b04b4e4d-2f15-446b-974f-21f72fd80fe0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-Gen-Llama3_1-70B/1762652579.490346",
- "retrieved_timestamp": "1762652579.490347",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B",
- "developer": "BAAI",
- "inference_platform": "unknown",
- "id": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-70B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7335458804859993
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6695200461367471
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25226586102719034
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37583892617449666
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45390625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.460688164893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-Gen-Llama3_1-8B/84f2027c-3e68-489e-902b-2fec6ec8f850.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-Gen-Llama3_1-8B/84f2027c-3e68-489e-902b-2fec6ec8f850.json
deleted file mode 100644
index 91365b830aa4b471b9334d7fe35522b4bc35696e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-Gen-Llama3_1-8B/84f2027c-3e68-489e-902b-2fec6ec8f850.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-Gen-Llama3_1-8B/1762652579.4905548",
- "retrieved_timestamp": "1762652579.490556",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B",
- "developer": "BAAI",
- "inference_platform": "unknown",
- "id": "BAAI/Infinity-Instruct-7M-Gen-Llama3_1-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6131952109292234
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5077335431381055
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12764350453172205
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29278523489932884
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35784375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3223902925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-Gen-mistral-7B/51daf5e7-1d4e-4753-b24b-79273e6f9370.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-Gen-mistral-7B/51daf5e7-1d4e-4753-b24b-79273e6f9370.json
deleted file mode 100644
index 6346de938342970c97452c09775d23b3b5d03f7b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_Infinity-Instruct-7M-Gen-mistral-7B/51daf5e7-1d4e-4753-b24b-79273e6f9370.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BAAI_Infinity-Instruct-7M-Gen-mistral-7B/1762652579.490771",
- "retrieved_timestamp": "1762652579.490772",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BAAI/Infinity-Instruct-7M-Gen-mistral-7B",
- "developer": "BAAI",
- "inference_platform": "unknown",
- "id": "BAAI/Infinity-Instruct-7M-Gen-mistral-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6146690780462506
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4963813586525743
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08308157099697885
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4061875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3273769946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_OPI-Llama-3.1-8B-Instruct/567f27f3-3f64-4054-aa67-684c29e4d71a.json b/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_OPI-Llama-3.1-8B-Instruct/567f27f3-3f64-4054-aa67-684c29e4d71a.json
deleted file mode 100644
index d92e1d660df05931e6c610d2e3d476faaf5bf446..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BAAI/BAAI_OPI-Llama-3.1-8B-Instruct/567f27f3-3f64-4054-aa67-684c29e4d71a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BAAI_OPI-Llama-3.1-8B-Instruct/1762652579.490996",
- "retrieved_timestamp": "1762652579.490996",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BAAI/OPI-Llama-3.1-8B-Instruct",
- "developer": "BAAI",
- "inference_platform": "unknown",
- "id": "BAAI/OPI-Llama-3.1-8B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20745510800232272
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3551224419497605
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.013595166163141994
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3233020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21243351063829788
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024/112be4bf-bfac-470f-bde8-c1e4d7282667.json b/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024/112be4bf-bfac-470f-bde8-c1e4d7282667.json
deleted file mode 100644
index 71e72e2d278f215dc737929bf399b6ed4b45dcc9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024/112be4bf-bfac-470f-bde8-c1e4d7282667.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BEE-spoke-data_tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024/1762652579.492853",
- "retrieved_timestamp": "1762652579.492853",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024",
- "developer": "BEE-spoke-data",
- "inference_platform": "unknown",
- "id": "BEE-spoke-data/tFINE-900m-e16-d32-flan-infinity-instruct-7m-T2T_en-1024"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13206735905176042
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3137786304497592
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.010574018126888218
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25419463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43927083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12367021276595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "T5ForConditionalGeneration",
- "params_billions": 0.887
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-e16-d32-flan/cdf0ce69-4697-4f16-a769-80691cc08b27.json b/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-e16-d32-flan/cdf0ce69-4697-4f16-a769-80691cc08b27.json
deleted file mode 100644
index ad3cc2bd9d4af96fc7f135e02de5db00c1e8c7fe..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-e16-d32-flan/cdf0ce69-4697-4f16-a769-80691cc08b27.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BEE-spoke-data_tFINE-900m-e16-d32-flan/1762652579.492592",
- "retrieved_timestamp": "1762652579.492592",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BEE-spoke-data/tFINE-900m-e16-d32-flan",
- "developer": "BEE-spoke-data",
- "inference_platform": "unknown",
- "id": "BEE-spoke-data/tFINE-900m-e16-d32-flan"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15057713533424646
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30280434847620613
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.009818731117824773
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2332214765100671
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3724166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1307347074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "T5ForConditionalGeneration",
- "params_billions": 0.887
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-e16-d32-instruct_2e/7b1574ca-4106-42c0-9336-27df4f0851aa.json b/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-e16-d32-instruct_2e/7b1574ca-4106-42c0-9336-27df4f0851aa.json
deleted file mode 100644
index 374b0997c07f72f30e076ed107642c524b2cc9df..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-e16-d32-instruct_2e/7b1574ca-4106-42c0-9336-27df4f0851aa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BEE-spoke-data_tFINE-900m-e16-d32-instruct_2e/1762652579.493063",
- "retrieved_timestamp": "1762652579.493064",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e",
- "developer": "BEE-spoke-data",
- "inference_platform": "unknown",
- "id": "BEE-spoke-data/tFINE-900m-e16-d32-instruct_2e"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1402855534426433
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31345674638809023
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.013595166163141994
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42069791666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12367021276595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "T5ForConditionalGeneration",
- "params_billions": 0.887
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-instruct-orpo/e91b6749-3103-4cfa-bf16-86126ee2086e.json b/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-instruct-orpo/e91b6749-3103-4cfa-bf16-86126ee2086e.json
deleted file mode 100644
index 48cdf2542590a3cf792084fa18485e5af78e3fdd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BEE-spoke-data/BEE-spoke-data_tFINE-900m-instruct-orpo/e91b6749-3103-4cfa-bf16-86126ee2086e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BEE-spoke-data_tFINE-900m-instruct-orpo/1762652579.493278",
- "retrieved_timestamp": "1762652579.493279",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BEE-spoke-data/tFINE-900m-instruct-orpo",
- "developer": "BEE-spoke-data",
- "inference_platform": "unknown",
- "id": "BEE-spoke-data/tFINE-900m-instruct-orpo"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13299157346950535
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30220933767045094
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015861027190332326
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3408541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11519281914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "T5ForConditionalGeneration",
- "params_billions": 0.887
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BSC-LT/BSC-LT_salamandra-7b-instruct/2eb60f3a-53f4-478a-8292-aa5e210a8cdf.json b/leaderboard_data/HFOpenLLMv2/BSC-LT/BSC-LT_salamandra-7b-instruct/2eb60f3a-53f4-478a-8292-aa5e210a8cdf.json
deleted file mode 100644
index ebf36d902c51602dd63692564f30a0232ea687e6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BSC-LT/BSC-LT_salamandra-7b-instruct/2eb60f3a-53f4-478a-8292-aa5e210a8cdf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BSC-LT_salamandra-7b-instruct/1762652579.493781",
- "retrieved_timestamp": "1762652579.493781",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BSC-LT/salamandra-7b-instruct",
- "developer": "BSC-LT",
- "inference_platform": "unknown",
- "id": "BSC-LT/salamandra-7b-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24507418095098782
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3851324290080956
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.008308157099697885
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41343749999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18051861702127658
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 7.768
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BSC-LT/BSC-LT_salamandra-7b/36d2d3af-60aa-4624-b414-e249d06b6ee1.json b/leaderboard_data/HFOpenLLMv2/BSC-LT/BSC-LT_salamandra-7b/36d2d3af-60aa-4624-b414-e249d06b6ee1.json
deleted file mode 100644
index 2877eafae85bdbf463f001300fe3696a480a76ac..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BSC-LT/BSC-LT_salamandra-7b/36d2d3af-60aa-4624-b414-e249d06b6ee1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BSC-LT_salamandra-7b/1762652579.493503",
- "retrieved_timestamp": "1762652579.493503",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BSC-LT/salamandra-7b",
- "developer": "BSC-LT",
- "inference_platform": "unknown",
- "id": "BSC-LT/salamandra-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13673829882489574
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3516612209885983
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0037764350453172208
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35009375000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14926861702127658
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 7.768
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Baptiste-HUVELLE-10/Baptiste-HUVELLE-10_LeTriomphant2.2_ECE_iLAB/b1632b15-fa00-4476-b3f4-05aba95df664.json b/leaderboard_data/HFOpenLLMv2/Baptiste-HUVELLE-10/Baptiste-HUVELLE-10_LeTriomphant2.2_ECE_iLAB/b1632b15-fa00-4476-b3f4-05aba95df664.json
deleted file mode 100644
index a65778130a1a38105ac4d0f50d1ea6f57b8eea19..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Baptiste-HUVELLE-10/Baptiste-HUVELLE-10_LeTriomphant2.2_ECE_iLAB/b1632b15-fa00-4476-b3f4-05aba95df664.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Baptiste-HUVELLE-10_LeTriomphant2.2_ECE_iLAB/1762652579.4943",
- "retrieved_timestamp": "1762652579.4943008",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB",
- "developer": "Baptiste-HUVELLE-10",
- "inference_platform": "unknown",
- "id": "Baptiste-HUVELLE-10/LeTriomphant2.2_ECE_iLAB"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5076330802271307
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7256319952414622
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44486404833836857
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39932885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46255208333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5851063829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.706
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BenevolenceMessiah/BenevolenceMessiah_Qwen2.5-72B-2x-Instruct-TIES-v1.0/ad8e3029-612c-434e-a92b-f5c481476e25.json b/leaderboard_data/HFOpenLLMv2/BenevolenceMessiah/BenevolenceMessiah_Qwen2.5-72B-2x-Instruct-TIES-v1.0/ad8e3029-612c-434e-a92b-f5c481476e25.json
deleted file mode 100644
index 369f9325e987d6b6bd8717e942cbbe66654cf1a0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BenevolenceMessiah/BenevolenceMessiah_Qwen2.5-72B-2x-Instruct-TIES-v1.0/ad8e3029-612c-434e-a92b-f5c481476e25.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BenevolenceMessiah_Qwen2.5-72B-2x-Instruct-TIES-v1.0/1762652579.4945831",
- "retrieved_timestamp": "1762652579.494584",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0",
- "developer": "BenevolenceMessiah",
- "inference_platform": "unknown",
- "id": "BenevolenceMessiah/Qwen2.5-72B-2x-Instruct-TIES-v1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5473499204333391
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.727311411382245
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5785498489425982
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3674496644295302
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4206666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5628324468085106
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.7
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BenevolenceMessiah/BenevolenceMessiah_Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0/129ba653-ec88-46f2-8828-77e320b922c6.json b/leaderboard_data/HFOpenLLMv2/BenevolenceMessiah/BenevolenceMessiah_Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0/129ba653-ec88-46f2-8828-77e320b922c6.json
deleted file mode 100644
index ae809373b904b32af0981ac435dee2eff3ebae23..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BenevolenceMessiah/BenevolenceMessiah_Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0/129ba653-ec88-46f2-8828-77e320b922c6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BenevolenceMessiah_Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0/1762652579.4948769",
- "retrieved_timestamp": "1762652579.494878",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0",
- "developer": "BenevolenceMessiah",
- "inference_platform": "unknown",
- "id": "BenevolenceMessiah/Yi-Coder-9B-Chat-Instruct-TIES-MoE-v1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011531624977283
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4908666248538678
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04154078549848943
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4079791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26803523936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 28.309
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BlackBeenie/BlackBeenie_Bloslain-8B-v0.2/160fb625-9c1c-40c1-ab93-7d9f7a2220d2.json b/leaderboard_data/HFOpenLLMv2/BlackBeenie/BlackBeenie_Bloslain-8B-v0.2/160fb625-9c1c-40c1-ab93-7d9f7a2220d2.json
deleted file mode 100644
index a7bc1d1e9b54c970754b8ebb76cccc76e4781337..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BlackBeenie/BlackBeenie_Bloslain-8B-v0.2/160fb625-9c1c-40c1-ab93-7d9f7a2220d2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BlackBeenie_Bloslain-8B-v0.2/1762652579.495104",
- "retrieved_timestamp": "1762652579.495104",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BlackBeenie/Bloslain-8B-v0.2",
- "developer": "BlackBeenie",
- "inference_platform": "unknown",
- "id": "BlackBeenie/Bloslain-8B-v0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5023371321427147
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.511087946253543
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14501510574018128
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4075729166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3653590425531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BlackBeenie/BlackBeenie_Llama-3.1-8B-OpenO1-SFT-v0.1/b298e0fc-f4fb-4464-beb8-45f8b5f35653.json b/leaderboard_data/HFOpenLLMv2/BlackBeenie/BlackBeenie_Llama-3.1-8B-OpenO1-SFT-v0.1/b298e0fc-f4fb-4464-beb8-45f8b5f35653.json
deleted file mode 100644
index 785931f944772b0e25f480e73ad5dfbf7d983501..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BlackBeenie/BlackBeenie_Llama-3.1-8B-OpenO1-SFT-v0.1/b298e0fc-f4fb-4464-beb8-45f8b5f35653.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BlackBeenie_Llama-3.1-8B-OpenO1-SFT-v0.1/1762652579.495378",
- "retrieved_timestamp": "1762652579.495378",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1",
- "developer": "BlackBeenie",
- "inference_platform": "unknown",
- "id": "BlackBeenie/Llama-3.1-8B-OpenO1-SFT-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5124037553690873
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4787448361604986
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15256797583081572
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36181250000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34915226063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_DreadMix/e6b5e728-28a4-444a-8b6b-89d29b7b5225.json b/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_DreadMix/e6b5e728-28a4-444a-8b6b-89d29b7b5225.json
deleted file mode 100644
index 96989649a3ba341117481de6daa07ef603272bfa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_DreadMix/e6b5e728-28a4-444a-8b6b-89d29b7b5225.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BoltMonkey_DreadMix/1762652579.497959",
- "retrieved_timestamp": "1762652579.497961",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BoltMonkey/DreadMix",
- "developer": "BoltMonkey",
- "inference_platform": "unknown",
- "id": "BoltMonkey/DreadMix"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7094908176970438
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5435097438362475
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1555891238670695
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42121875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37898936170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/d9e3bd73-cd7e-46d4-9e62-0cfac178f62a.json b/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/d9e3bd73-cd7e-46d4-9e62-0cfac178f62a.json
deleted file mode 100644
index fcdfc5295efec531f86ae0a118d547fa9e3e869d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/d9e3bd73-cd7e-46d4-9e62-0cfac178f62a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BoltMonkey_NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/1762652579.498452",
- "retrieved_timestamp": "1762652579.498454",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated",
- "developer": "BoltMonkey",
- "inference_platform": "unknown",
- "id": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7998909559967553
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5151987922850448
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11933534743202417
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.401875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37333776595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/f83a5d67-b967-47c8-b76e-b58c445a3634.json b/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/f83a5d67-b967-47c8-b76e-b58c445a3634.json
deleted file mode 100644
index 5f5c2c0a083544ab4a69340561fef068be0e1a60..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/f83a5d67-b967-47c8-b76e-b58c445a3634.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BoltMonkey_NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated/1762652579.498964",
- "retrieved_timestamp": "1762652579.498965",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated",
- "developer": "BoltMonkey",
- "inference_platform": "unknown",
- "id": "BoltMonkey/NeuralDaredevil-SuperNova-Lite-7B-DARETIES-abliterated"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45902316963434797
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5185441912447182
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09365558912386707
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4082604166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3631150265957447
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_SuperNeuralDreadDevil-8b/2ad0eebb-31e3-4f28-aba6-073f33d5cbed.json b/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_SuperNeuralDreadDevil-8b/2ad0eebb-31e3-4f28-aba6-073f33d5cbed.json
deleted file mode 100644
index f3ce3a86e8987bffc0f59c2ea28a3a72c9009d6d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BoltMonkey/BoltMonkey_SuperNeuralDreadDevil-8b/2ad0eebb-31e3-4f28-aba6-073f33d5cbed.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BoltMonkey_SuperNeuralDreadDevil-8b/1762652579.499188",
- "retrieved_timestamp": "1762652579.499189",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BoltMonkey/SuperNeuralDreadDevil-8b",
- "developer": "BoltMonkey",
- "inference_platform": "unknown",
- "id": "BoltMonkey/SuperNeuralDreadDevil-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7709898624538447
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5286196012035721
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09290030211480363
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39768749999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36785239361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_GEITje-7B-ultra/efcc28d3-ca6a-4100-afd2-75f9925354ba.json b/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_GEITje-7B-ultra/efcc28d3-ca6a-4100-afd2-75f9925354ba.json
deleted file mode 100644
index 32afe6b2208ee3733358a8a35ba11104e6cb0afb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_GEITje-7B-ultra/efcc28d3-ca6a-4100-afd2-75f9925354ba.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BramVanroy_GEITje-7B-ultra/1762652579.499682",
- "retrieved_timestamp": "1762652579.4996831",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BramVanroy/GEITje-7B-ultra",
- "developer": "BramVanroy",
- "inference_platform": "unknown",
- "id": "BramVanroy/GEITje-7B-ultra"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3723442687624392
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37761612997305494
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015861027190332326
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32897916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20113031914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_fietje-2-chat/faf20d1a-5a92-49b2-be69-903cafb9460a.json b/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_fietje-2-chat/faf20d1a-5a92-49b2-be69-903cafb9460a.json
deleted file mode 100644
index 755acdf50262b95831ab458e06186dc169bb1c05..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_fietje-2-chat/faf20d1a-5a92-49b2-be69-903cafb9460a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BramVanroy_fietje-2-chat/1762652579.500146",
- "retrieved_timestamp": "1762652579.5001469",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BramVanroy/fietje-2-chat",
- "developer": "BramVanroy",
- "inference_platform": "unknown",
- "id": "BramVanroy/fietje-2-chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2917359273394593
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4149753717401999
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0188821752265861
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23993288590604026
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3527604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20545212765957446
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "PhiForCausalLM",
- "params_billions": 2.775
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_fietje-2-instruct/03e122da-30cc-4c2e-9b44-8261c3f2a934.json b/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_fietje-2-instruct/03e122da-30cc-4c2e-9b44-8261c3f2a934.json
deleted file mode 100644
index e42d671b504719a3a0b26adcb82de584060d900e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_fietje-2-instruct/03e122da-30cc-4c2e-9b44-8261c3f2a934.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BramVanroy_fietje-2-instruct/1762652579.500353",
- "retrieved_timestamp": "1762652579.500354",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BramVanroy/fietje-2-instruct",
- "developer": "BramVanroy",
- "inference_platform": "unknown",
- "id": "BramVanroy/fietje-2-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2789963962286732
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41360714173029806
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.022658610271903322
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2332214765100671
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3369166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2103557180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "PhiForCausalLM",
- "params_billions": 2.775
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_fietje-2/3712e2c3-0ed1-4dc9-95fc-4be0bec18675.json b/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_fietje-2/3712e2c3-0ed1-4dc9-95fc-4be0bec18675.json
deleted file mode 100644
index 5fbf7cf1ae27018d9aaf2c533b858089e69c2179..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/BramVanroy/BramVanroy_fietje-2/3712e2c3-0ed1-4dc9-95fc-4be0bec18675.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BramVanroy_fietje-2/1762652579.499938",
- "retrieved_timestamp": "1762652579.499939",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BramVanroy/fietje-2",
- "developer": "BramVanroy",
- "inference_platform": "unknown",
- "id": "BramVanroy/fietje-2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20980332185268422
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40356695178386187
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015861027190332326
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25419463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3695625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19855385638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "PhiForCausalLM",
- "params_billions": 2.78
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_Llama-PLLuM-8B-chat/cb833a8b-81d7-41a6-bff2-9d0927703113.json b/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_Llama-PLLuM-8B-chat/cb833a8b-81d7-41a6-bff2-9d0927703113.json
deleted file mode 100644
index eac0c8b8c30ec742a139c1914c0d59f51f325f98..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_Llama-PLLuM-8B-chat/cb833a8b-81d7-41a6-bff2-9d0927703113.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_Llama-PLLuM-8B-chat/1762652579.5008068",
- "retrieved_timestamp": "1762652579.500808",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CYFRAGOVPL/Llama-PLLuM-8B-chat",
- "developer": "CYFRAGOVPL",
- "inference_platform": "unknown",
- "id": "CYFRAGOVPL/Llama-PLLuM-8B-chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3514862786295917
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40770722535589576
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.033987915407854986
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41991666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27194148936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-base/76833817-781e-4292-9fe8-5e8a1da7f962.json b/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-base/76833817-781e-4292-9fe8-5e8a1da7f962.json
deleted file mode 100644
index 57b80877cd62e210fa2b695af1aebc39f2350ad1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-base/76833817-781e-4292-9fe8-5e8a1da7f962.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_PLLuM-12B-base/1762652579.501051",
- "retrieved_timestamp": "1762652579.501052",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CYFRAGOVPL/PLLuM-12B-base",
- "developer": "CYFRAGOVPL",
- "inference_platform": "unknown",
- "id": "CYFRAGOVPL/PLLuM-12B-base"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2820937335159599
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4390596143784447
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.028700906344410877
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4142395833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2740192819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-chat/6e325f0f-b5db-4773-8179-7e949bd3f5f2.json b/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-chat/6e325f0f-b5db-4773-8179-7e949bd3f5f2.json
deleted file mode 100644
index f5de95952df14ad4940ab82458351efcb9e3577a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-chat/6e325f0f-b5db-4773-8179-7e949bd3f5f2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_PLLuM-12B-chat/1762652579.501271",
- "retrieved_timestamp": "1762652579.501272",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CYFRAGOVPL/PLLuM-12B-chat",
- "developer": "CYFRAGOVPL",
- "inference_platform": "unknown",
- "id": "CYFRAGOVPL/PLLuM-12B-chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32143601200370575
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44458000333075703
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01812688821752266
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4114791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2872340425531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-nc-base/e9b90a3b-09c6-4d3b-9aa3-6279ea3cccb5.json b/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-nc-base/e9b90a3b-09c6-4d3b-9aa3-6279ea3cccb5.json
deleted file mode 100644
index 9590ea3f8b41c178e0728fbff9b2eff6c130fbc1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-nc-base/e9b90a3b-09c6-4d3b-9aa3-6279ea3cccb5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_PLLuM-12B-nc-base/1762652579.501493",
- "retrieved_timestamp": "1762652579.501494",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CYFRAGOVPL/PLLuM-12B-nc-base",
- "developer": "CYFRAGOVPL",
- "inference_platform": "unknown",
- "id": "CYFRAGOVPL/PLLuM-12B-nc-base"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24045310886226323
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42767589675970014
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02190332326283988
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36451041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25590093085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-nc-chat/fd19dada-5945-45d5-8a84-122404b8dd57.json b/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-nc-chat/fd19dada-5945-45d5-8a84-122404b8dd57.json
deleted file mode 100644
index 5b55bc1d9d414bed5464c75c042f392d4265f807..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/CYFRAGOVPL/CYFRAGOVPL_PLLuM-12B-nc-chat/fd19dada-5945-45d5-8a84-122404b8dd57.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CYFRAGOVPL_PLLuM-12B-nc-chat/1762652579.501705",
- "retrieved_timestamp": "1762652579.501706",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CYFRAGOVPL/PLLuM-12B-nc-chat",
- "developer": "CYFRAGOVPL",
- "inference_platform": "unknown",
- "id": "CYFRAGOVPL/PLLuM-12B-nc-chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28344237733657807
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45764328318815456
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.012084592145015106
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4353541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25972406914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/CarrotAI/CarrotAI_Llama-3.2-Rabbit-Ko-3B-Instruct-2412/41809335-e00c-4911-bc08-6edd71891585.json b/leaderboard_data/HFOpenLLMv2/CarrotAI/CarrotAI_Llama-3.2-Rabbit-Ko-3B-Instruct-2412/41809335-e00c-4911-bc08-6edd71891585.json
deleted file mode 100644
index 223ac119e89ddcfbd063ef5ab6bafedd9542c87f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/CarrotAI/CarrotAI_Llama-3.2-Rabbit-Ko-3B-Instruct-2412/41809335-e00c-4911-bc08-6edd71891585.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CarrotAI_Llama-3.2-Rabbit-Ko-3B-Instruct-2412/1762652579.5021691",
- "retrieved_timestamp": "1762652579.50217",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412",
- "developer": "CarrotAI",
- "inference_platform": "unknown",
- "id": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct-2412"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47818233398493776
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43577246498246686
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17598187311178248
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29278523489932884
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3872083333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31341422872340424
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/CarrotAI/CarrotAI_Llama-3.2-Rabbit-Ko-3B-Instruct/8c56b973-d5cb-48b6-a43e-ad50769b1f40.json b/leaderboard_data/HFOpenLLMv2/CarrotAI/CarrotAI_Llama-3.2-Rabbit-Ko-3B-Instruct/8c56b973-d5cb-48b6-a43e-ad50769b1f40.json
deleted file mode 100644
index 08745a2a060c3f5c46a2f4ef559af65072b96595..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/CarrotAI/CarrotAI_Llama-3.2-Rabbit-Ko-3B-Instruct/8c56b973-d5cb-48b6-a43e-ad50769b1f40.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CarrotAI_Llama-3.2-Rabbit-Ko-3B-Instruct/1762652579.501917",
- "retrieved_timestamp": "1762652579.5019178",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct",
- "developer": "CarrotAI",
- "inference_platform": "unknown",
- "id": "CarrotAI/Llama-3.2-Rabbit-Ko-3B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7198821349574684
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4426719080820793
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2054380664652568
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3649166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2822473404255319
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Casual-Autopsy/Casual-Autopsy_L3-Umbral-Mind-RP-v2.0-8B/da5c1edf-bd74-48a3-ad76-a4bd89539b7f.json b/leaderboard_data/HFOpenLLMv2/Casual-Autopsy/Casual-Autopsy_L3-Umbral-Mind-RP-v2.0-8B/da5c1edf-bd74-48a3-ad76-a4bd89539b7f.json
deleted file mode 100644
index c65d28fddba9b5ccb86bcce61529587494c5384f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Casual-Autopsy/Casual-Autopsy_L3-Umbral-Mind-RP-v2.0-8B/da5c1edf-bd74-48a3-ad76-a4bd89539b7f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Casual-Autopsy_L3-Umbral-Mind-RP-v2.0-8B/1762652579.502389",
- "retrieved_timestamp": "1762652579.502389",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B",
- "developer": "Casual-Autopsy",
- "inference_platform": "unknown",
- "id": "Casual-Autopsy/L3-Umbral-Mind-RP-v2.0-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7122634609502786
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5262406145493724
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1095166163141994
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28691275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3686666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3723404255319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/CausalLM/CausalLM_14B/c4376867-854d-44fa-9215-b9c1af7612a4.json b/leaderboard_data/HFOpenLLMv2/CausalLM/CausalLM_14B/c4376867-854d-44fa-9215-b9c1af7612a4.json
deleted file mode 100644
index 9edbf818f82b3f2e07323d4921b60c732ff67b38..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/CausalLM/CausalLM_14B/c4376867-854d-44fa-9215-b9c1af7612a4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CausalLM_14B/1762652579.502646",
- "retrieved_timestamp": "1762652579.502647",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CausalLM/14B",
- "developer": "CausalLM",
- "inference_platform": "unknown",
- "id": "CausalLM/14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2788213052478535
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4700462397700626
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0755287009063444
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4154791666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3221409574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/CausalLM/CausalLM_34b-beta/cc482ca4-031a-4c22-90c2-68322184125b.json b/leaderboard_data/HFOpenLLMv2/CausalLM/CausalLM_34b-beta/cc482ca4-031a-4c22-90c2-68322184125b.json
deleted file mode 100644
index d67cbc79017b1f0d1c1c77dfccd76774d7b485e1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/CausalLM/CausalLM_34b-beta/cc482ca4-031a-4c22-90c2-68322184125b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CausalLM_34b-beta/1762652579.502916",
- "retrieved_timestamp": "1762652579.502916",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CausalLM/34b-beta",
- "developer": "CausalLM",
- "inference_platform": "unknown",
- "id": "CausalLM/34b-beta"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3043247472262486
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5590996102136266
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04833836858006042
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3464765100671141
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37486458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5324966755319149
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.389
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/CausalLM/CausalLM_preview-1-hf/e9fcf09c-14e2-4226-b1e5-b5752ac1a753.json b/leaderboard_data/HFOpenLLMv2/CausalLM/CausalLM_preview-1-hf/e9fcf09c-14e2-4226-b1e5-b5752ac1a753.json
deleted file mode 100644
index 744daa3ad804cc501505d84c2f08fc90dbd8fedb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/CausalLM/CausalLM_preview-1-hf/e9fcf09c-14e2-4226-b1e5-b5752ac1a753.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CausalLM_preview-1-hf/1762652579.503128",
- "retrieved_timestamp": "1762652579.503129",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CausalLM/preview-1-hf",
- "developer": "CausalLM",
- "inference_platform": "unknown",
- "id": "CausalLM/preview-1-hf"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5558928088582737
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3614567463880903
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.030211480362537766
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34218750000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35970744680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GlmForCausalLM",
- "params_billions": 9.543
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Changgil/Changgil_K2S3-14b-v0.2/4dfe2d3c-7fc3-4b57-8acd-02b0808ccdb1.json b/leaderboard_data/HFOpenLLMv2/Changgil/Changgil_K2S3-14b-v0.2/4dfe2d3c-7fc3-4b57-8acd-02b0808ccdb1.json
deleted file mode 100644
index 01ffe028972970c2f7769f87927e75c04046b123..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Changgil/Changgil_K2S3-14b-v0.2/4dfe2d3c-7fc3-4b57-8acd-02b0808ccdb1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Changgil_K2S3-14b-v0.2/1762652579.503338",
- "retrieved_timestamp": "1762652579.503339",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Changgil/K2S3-14b-v0.2",
- "developer": "Changgil",
- "inference_platform": "unknown",
- "id": "Changgil/K2S3-14b-v0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3242840108689389
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4613311786298187
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05740181268882175
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3922604166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2643783244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 14.352
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Changgil/Changgil_K2S3-v0.1/225bc36b-4bfb-4818-8601-903e7f9decb3.json b/leaderboard_data/HFOpenLLMv2/Changgil/Changgil_K2S3-v0.1/225bc36b-4bfb-4818-8601-903e7f9decb3.json
deleted file mode 100644
index 885ce58b95a10a560ede35872ae77f69d0fe3f20..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Changgil/Changgil_K2S3-v0.1/225bc36b-4bfb-4818-8601-903e7f9decb3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Changgil_K2S3-v0.1/1762652579.503593",
- "retrieved_timestamp": "1762652579.503594",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Changgil/K2S3-v0.1",
- "developer": "Changgil",
- "inference_platform": "unknown",
- "id": "Changgil/K2S3-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32765617450586665
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46554920672286154
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04607250755287009
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40140624999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2562333776595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 14.352
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_Albacus/0be5437b-2489-4107-8c38-d0cd198a2d8c.json b/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_Albacus/0be5437b-2489-4107-8c38-d0cd198a2d8c.json
deleted file mode 100644
index 77561cff43410d0838e495954d0564921cb1af79..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_Albacus/0be5437b-2489-4107-8c38-d0cd198a2d8c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ClaudioItaly_Albacus/1762652579.503804",
- "retrieved_timestamp": "1762652579.503805",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ClaudioItaly/Albacus",
- "developer": "ClaudioItaly",
- "inference_platform": "unknown",
- "id": "ClaudioItaly/Albacus"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4667415790103592
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5113043406568835
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07099697885196375
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27181208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41353124999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31648936170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 8.987
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_Book-Gut12B/b2bdf337-9065-4a67-aa1a-5ba8751d5438.json b/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_Book-Gut12B/b2bdf337-9065-4a67-aa1a-5ba8751d5438.json
deleted file mode 100644
index 2c1e3253265764a52bc3b6d025a12cbc50582190..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_Book-Gut12B/b2bdf337-9065-4a67-aa1a-5ba8751d5438.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ClaudioItaly_Book-Gut12B/1762652579.504094",
- "retrieved_timestamp": "1762652579.504095",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ClaudioItaly/Book-Gut12B",
- "developer": "ClaudioItaly",
- "inference_platform": "unknown",
- "id": "ClaudioItaly/Book-Gut12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39984685080032095
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5417370194443233
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10196374622356495
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4635416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3670212765957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_Evolutionstory-7B-v2.2/e06c19ce-9247-473b-b5db-8686fee5e785.json b/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_Evolutionstory-7B-v2.2/e06c19ce-9247-473b-b5db-8686fee5e785.json
deleted file mode 100644
index 333f59d96d735bafd3fb0ed7ecfdb618a37d08e5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_Evolutionstory-7B-v2.2/e06c19ce-9247-473b-b5db-8686fee5e785.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ClaudioItaly_Evolutionstory-7B-v2.2/1762652579.504309",
- "retrieved_timestamp": "1762652579.504309",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ClaudioItaly/Evolutionstory-7B-v2.2",
- "developer": "ClaudioItaly",
- "inference_platform": "unknown",
- "id": "ClaudioItaly/Evolutionstory-7B-v2.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4813794066410457
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5108043406568835
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07099697885196375
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2751677852348993
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41353124999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31590757978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_intelligence-cod-rag-7b-v3/51559a6d-1262-41e2-8092-008dc8f53974.json b/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_intelligence-cod-rag-7b-v3/51559a6d-1262-41e2-8092-008dc8f53974.json
deleted file mode 100644
index 3e0fdf20ac4c6035de5afe3aa742658115d6b1ca..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ClaudioItaly/ClaudioItaly_intelligence-cod-rag-7b-v3/51559a6d-1262-41e2-8092-008dc8f53974.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ClaudioItaly_intelligence-cod-rag-7b-v3/1762652579.504531",
- "retrieved_timestamp": "1762652579.504531",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ClaudioItaly/intelligence-cod-rag-7b-v3",
- "developer": "ClaudioItaly",
- "inference_platform": "unknown",
- "id": "ClaudioItaly/intelligence-cod-rag-7b-v3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6897820006471718
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5366339718839108
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3806646525679758
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4152708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4195478723404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-23-35B/9c77aa3f-080c-4dd6-8a9d-50d18657de35.json b/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-23-35B/9c77aa3f-080c-4dd6-8a9d-50d18657de35.json
deleted file mode 100644
index 31739a37ec4a98173a0a053588a5a6414f3560ec..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-23-35B/9c77aa3f-080c-4dd6-8a9d-50d18657de35.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CohereForAI_aya-23-35B/1762652579.5047522",
- "retrieved_timestamp": "1762652579.5047529",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CohereForAI/aya-23-35B",
- "developer": "CohereForAI",
- "inference_platform": "unknown",
- "id": "CohereForAI/aya-23-35B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6461932117891638
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5399551450731271
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03474320241691843
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4309895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33560505319148937
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "CohereForCausalLM",
- "params_billions": 34.981
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-23-8B/2ff655cd-9123-4577-832b-3f0b04f7d466.json b/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-23-8B/2ff655cd-9123-4577-832b-3f0b04f7d466.json
deleted file mode 100644
index ae8daf4a1e83770ba740b628adb863dbbefc67b6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-23-8B/2ff655cd-9123-4577-832b-3f0b04f7d466.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CohereForAI_aya-23-8B/1762652579.5050838",
- "retrieved_timestamp": "1762652579.505085",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CohereForAI/aya-23-8B",
- "developer": "CohereForAI",
- "inference_platform": "unknown",
- "id": "CohereForAI/aya-23-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4698887839820565
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4296161519220307
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01661631419939577
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3940625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2278091755319149
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "CohereForCausalLM",
- "params_billions": 8.028
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-expanse-32b/ebbe9a61-6dff-467a-b77c-7c125a043832.json b/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-expanse-32b/ebbe9a61-6dff-467a-b77c-7c125a043832.json
deleted file mode 100644
index df351f0515c8d11e179ddb3d512495047f091e35..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-expanse-32b/ebbe9a61-6dff-467a-b77c-7c125a043832.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CohereForAI_aya-expanse-32b/1762652579.505483",
- "retrieved_timestamp": "1762652579.505484",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CohereForAI/aya-expanse-32b",
- "developer": "CohereForAI",
- "inference_platform": "unknown",
- "id": "CohereForAI/aya-expanse-32b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7301737168490716
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5648670099212114
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15332326283987915
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32550335570469796
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3872708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41298204787234044
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "CohereForCausalLM",
- "params_billions": 32.296
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-expanse-8b/3d54299c-ae39-45f4-b31c-c0667dcbe9f4.json b/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-expanse-8b/3d54299c-ae39-45f4-b31c-c0667dcbe9f4.json
deleted file mode 100644
index b407718e14b2658516611acb7a6ae77e8fe885a9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_aya-expanse-8b/3d54299c-ae39-45f4-b31c-c0667dcbe9f4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CohereForAI_aya-expanse-8b/1762652579.505729",
- "retrieved_timestamp": "1762652579.5057302",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CohereForAI/aya-expanse-8b",
- "developer": "CohereForAI",
- "inference_platform": "unknown",
- "id": "CohereForAI/aya-expanse-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6358517622131501
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4977203055736406
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08610271903323263
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37288541666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3003656914893617
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "CohereForCausalLM",
- "params_billions": 8.028
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r-plus-08-2024/f1ef3dda-1b62-4ec9-9c88-a8e60b8a8f6d.json b/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r-plus-08-2024/f1ef3dda-1b62-4ec9-9c88-a8e60b8a8f6d.json
deleted file mode 100644
index 542a30489f4a9a04d1113baf0028b56756cb7a5d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r-plus-08-2024/f1ef3dda-1b62-4ec9-9c88-a8e60b8a8f6d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CohereForAI_c4ai-command-r-plus-08-2024/1762652579.506166",
- "retrieved_timestamp": "1762652579.506167",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CohereForAI/c4ai-command-r-plus-08-2024",
- "developer": "CohereForAI",
- "inference_platform": "unknown",
- "id": "CohereForAI/c4ai-command-r-plus-08-2024"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7539539532883859
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5995999913027185
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12386706948640483
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35067114093959734
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48294791666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44207114361702127
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "CohereForCausalLM",
- "params_billions": 103.811
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r-plus/c5326cd1-8e73-4f84-8efb-49b3be5c50e7.json b/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r-plus/c5326cd1-8e73-4f84-8efb-49b3be5c50e7.json
deleted file mode 100644
index 833b34d48bb1bd419e027e9d635f0fda275ae87f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r-plus/c5326cd1-8e73-4f84-8efb-49b3be5c50e7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CohereForAI_c4ai-command-r-plus/1762652579.50595",
- "retrieved_timestamp": "1762652579.505951",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CohereForAI/c4ai-command-r-plus",
- "developer": "CohereForAI",
- "inference_platform": "unknown",
- "id": "CohereForAI/c4ai-command-r-plus"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7664186580495308
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.581542357407793
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08006042296072508
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48071875000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3991855053191489
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "CohereForCausalLM",
- "params_billions": 103.811
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r-v01/cd24b18c-faff-44e1-87d6-735bcb9ab465.json b/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r-v01/cd24b18c-faff-44e1-87d6-735bcb9ab465.json
deleted file mode 100644
index 3d141a9dfd4579f552758c3b5da92a8d3bb15545..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r-v01/cd24b18c-faff-44e1-87d6-735bcb9ab465.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CohereForAI_c4ai-command-r-v01/1762652579.506387",
- "retrieved_timestamp": "1762652579.506388",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CohereForAI/c4ai-command-r-v01",
- "developer": "CohereForAI",
- "inference_platform": "unknown",
- "id": "CohereForAI/c4ai-command-r-v01"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6748194789824333
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5406415512767856
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03474320241691843
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45169791666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3369348404255319
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "CohereForCausalLM",
- "params_billions": 34.981
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r7b-12-2024/85fa7edb-df5c-4baa-a0f1-c520db55c08c.json b/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r7b-12-2024/85fa7edb-df5c-4baa-a0f1-c520db55c08c.json
deleted file mode 100644
index 5029fa4395e21078f3244f0516f7109e08430e6c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/CohereForAI/CohereForAI_c4ai-command-r7b-12-2024/85fa7edb-df5c-4baa-a0f1-c520db55c08c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CohereForAI_c4ai-command-r7b-12-2024/1762652579.5066051",
- "retrieved_timestamp": "1762652579.506606",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CohereForAI/c4ai-command-r7b-12-2024",
- "developer": "CohereForAI",
- "inference_platform": "unknown",
- "id": "CohereForAI/c4ai-command-r7b-12-2024"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7713145564878965
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5502642151855635
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2990936555891239
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41251041666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3572140957446808
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Cohere2ForCausalLM",
- "params_billions": 8.028
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-Gemma-2b-dpo-v1.0/76f198aa-0aa5-4c98-8d86-20410582d3a5.json b/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-Gemma-2b-dpo-v1.0/76f198aa-0aa5-4c98-8d86-20410582d3a5.json
deleted file mode 100644
index cb479a78237311135af3d5a6e38acc5ccbab19f6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-Gemma-2b-dpo-v1.0/76f198aa-0aa5-4c98-8d86-20410582d3a5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-Gemma-2b-dpo-v1.0/1762652579.506829",
- "retrieved_timestamp": "1762652579.50683",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0",
- "developer": "Columbia-NLP",
- "inference_platform": "unknown",
- "id": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3278312654866864
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39199563613207467
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04305135951661632
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24916107382550334
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41201041666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16655585106382978
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 2.506
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-Gemma-2b-dpo-v1.0/f39ad9a4-b02a-415e-b83a-53d705b6bea2.json b/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-Gemma-2b-dpo-v1.0/f39ad9a4-b02a-415e-b83a-53d705b6bea2.json
deleted file mode 100644
index b3988b3d82389607de12d9e360d59f24782ffe29..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-Gemma-2b-dpo-v1.0/f39ad9a4-b02a-415e-b83a-53d705b6bea2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-Gemma-2b-dpo-v1.0/1762652579.507083",
- "retrieved_timestamp": "1762652579.507083",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0",
- "developer": "Columbia-NLP",
- "inference_platform": "unknown",
- "id": "Columbia-NLP/LION-Gemma-2b-dpo-v1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3102457036219453
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38810309159554507
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05362537764350453
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2533557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4080729166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16647273936170212
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 2.506
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-Gemma-2b-sft-v1.0/0cb84d3d-4f5d-4afc-9c49-de567f2ffbcb.json b/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-Gemma-2b-sft-v1.0/0cb84d3d-4f5d-4afc-9c49-de567f2ffbcb.json
deleted file mode 100644
index ddfa65d1eeb033b088ca7b55399e255c67c7308a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-Gemma-2b-sft-v1.0/0cb84d3d-4f5d-4afc-9c49-de567f2ffbcb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-Gemma-2b-sft-v1.0/1762652579.507553",
- "retrieved_timestamp": "1762652579.507553",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Columbia-NLP/LION-Gemma-2b-sft-v1.0",
- "developer": "Columbia-NLP",
- "inference_platform": "unknown",
- "id": "Columbia-NLP/LION-Gemma-2b-sft-v1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3692469314751526
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.387877927616119
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06797583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2558724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4027395833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17819148936170212
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 2.506
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-LLaMA-3-8b-dpo-v1.0/bf83f2be-f684-4ba7-b244-c5cb10f8f0b1.json b/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-LLaMA-3-8b-dpo-v1.0/bf83f2be-f684-4ba7-b244-c5cb10f8f0b1.json
deleted file mode 100644
index c8e5a76262fedda51408c00187d6a7f59d5848a6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-LLaMA-3-8b-dpo-v1.0/bf83f2be-f684-4ba7-b244-c5cb10f8f0b1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-LLaMA-3-8b-dpo-v1.0/1762652579.5077918",
- "retrieved_timestamp": "1762652579.507793",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0",
- "developer": "Columbia-NLP",
- "inference_platform": "unknown",
- "id": "Columbia-NLP/LION-LLaMA-3-8b-dpo-v1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4957424079220912
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5028481044452986
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11706948640483383
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40971874999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3218916223404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-LLaMA-3-8b-sft-v1.0/23c9a71d-3504-497d-a0e2-6a5e299346e5.json b/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-LLaMA-3-8b-sft-v1.0/23c9a71d-3504-497d-a0e2-6a5e299346e5.json
deleted file mode 100644
index bd254e8cfa09764353339e38c29131a5b6074a2f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Columbia-NLP/Columbia-NLP_LION-LLaMA-3-8b-sft-v1.0/23c9a71d-3504-497d-a0e2-6a5e299346e5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-LLaMA-3-8b-sft-v1.0/1762652579.5082712",
- "retrieved_timestamp": "1762652579.5082722",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0",
- "developer": "Columbia-NLP",
- "inference_platform": "unknown",
- "id": "Columbia-NLP/LION-LLaMA-3-8b-sft-v1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38171163623629745
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5087766443418147
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11404833836858005
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45027083333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32372007978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/CombinHorizon/CombinHorizon_YiSM-blossom5.1-34B-SLERP/91ec4ba1-6948-48e8-8db0-a335b982c560.json b/leaderboard_data/HFOpenLLMv2/CombinHorizon/CombinHorizon_YiSM-blossom5.1-34B-SLERP/91ec4ba1-6948-48e8-8db0-a335b982c560.json
deleted file mode 100644
index 5fdc46d788dc4383159990d8837a5c8a6d4416bd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/CombinHorizon/CombinHorizon_YiSM-blossom5.1-34B-SLERP/91ec4ba1-6948-48e8-8db0-a335b982c560.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CombinHorizon_YiSM-blossom5.1-34B-SLERP/1762652579.508977",
- "retrieved_timestamp": "1762652579.508977",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CombinHorizon/YiSM-blossom5.1-34B-SLERP",
- "developer": "CombinHorizon",
- "inference_platform": "unknown",
- "id": "CombinHorizon/YiSM-blossom5.1-34B-SLERP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5033112142448702
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6207548093635428
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21525679758308158
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35570469798657717
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44134375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4740691489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.389
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ContactDoctor/ContactDoctor_Bio-Medical-3B-CoT-012025/4ad50c15-9b6d-40c8-b8ce-74253ecfe258.json b/leaderboard_data/HFOpenLLMv2/ContactDoctor/ContactDoctor_Bio-Medical-3B-CoT-012025/4ad50c15-9b6d-40c8-b8ce-74253ecfe258.json
deleted file mode 100644
index 056d276ca549a4c8893cf755c7a9f75aacbcebc8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ContactDoctor/ContactDoctor_Bio-Medical-3B-CoT-012025/4ad50c15-9b6d-40c8-b8ce-74253ecfe258.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ContactDoctor_Bio-Medical-3B-CoT-012025/1762652579.509939",
- "retrieved_timestamp": "1762652579.509939",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ContactDoctor/Bio-Medical-3B-CoT-012025",
- "developer": "ContactDoctor",
- "inference_platform": "unknown",
- "id": "ContactDoctor/Bio-Medical-3B-CoT-012025"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.360379349016166
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.438315337642466
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2212990936555891
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3367604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2933843085106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.085
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Corianas/Corianas_Quokka_2.7b/54015982-408c-469b-86da-6642f5708180.json b/leaderboard_data/HFOpenLLMv2/Corianas/Corianas_Quokka_2.7b/54015982-408c-469b-86da-6642f5708180.json
deleted file mode 100644
index 49f57e5e2f1bf544bc41a789b5843e366f85e2f2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Corianas/Corianas_Quokka_2.7b/54015982-408c-469b-86da-6642f5708180.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Corianas_Quokka_2.7b/1762652579.5120142",
- "retrieved_timestamp": "1762652579.512015",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Corianas/Quokka_2.7b",
- "developer": "Corianas",
- "inference_platform": "unknown",
- "id": "Corianas/Quokka_2.7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17490702447284318
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3055474937424842
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.008308157099697885
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2558724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3908333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11452792553191489
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GPT2LMHeadModel",
- "params_billions": 2.786
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/CortexLM/CortexLM_btlm-7b-base-v0.2/aded7428-1283-4ed8-b068-cc1a5ea92dca.json b/leaderboard_data/HFOpenLLMv2/CortexLM/CortexLM_btlm-7b-base-v0.2/aded7428-1283-4ed8-b068-cc1a5ea92dca.json
deleted file mode 100644
index 5be4a7ab929338bf9d868f7343863a72532774a1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/CortexLM/CortexLM_btlm-7b-base-v0.2/aded7428-1283-4ed8-b068-cc1a5ea92dca.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CortexLM_btlm-7b-base-v0.2/1762652579.512528",
- "retrieved_timestamp": "1762652579.512528",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CortexLM/btlm-7b-base-v0.2",
- "developer": "CortexLM",
- "inference_platform": "unknown",
- "id": "CortexLM/btlm-7b-base-v0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14832865685270635
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4006411985841813
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015105740181268883
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2533557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38460416666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2349567819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.885
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_SCE-2-24B/f4ff02eb-7763-41bc-8a86-adbb051603af.json b/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_SCE-2-24B/f4ff02eb-7763-41bc-8a86-adbb051603af.json
deleted file mode 100644
index 81adea72eb908d127880b8667dcce93ac56b0d3e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_SCE-2-24B/f4ff02eb-7763-41bc-8a86-adbb051603af.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Cran-May_SCE-2-24B/1762652579.512776",
- "retrieved_timestamp": "1762652579.5127769",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Cran-May/SCE-2-24B",
- "developer": "Cran-May",
- "inference_platform": "unknown",
- "id": "Cran-May/SCE-2-24B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5865924635522636
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6264692798019763
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18957703927492447
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.337248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4528125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.461186835106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 23.572
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_SCE-3-24B/2d7b9092-a9ad-4f47-b186-db1e1ce7cd6c.json b/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_SCE-3-24B/2d7b9092-a9ad-4f47-b186-db1e1ce7cd6c.json
deleted file mode 100644
index 395ba6542d8133158852baa4cbc3b4d02811d645..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_SCE-3-24B/2d7b9092-a9ad-4f47-b186-db1e1ce7cd6c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Cran-May_SCE-3-24B/1762652579.513022",
- "retrieved_timestamp": "1762652579.513023",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Cran-May/SCE-3-24B",
- "developer": "Cran-May",
- "inference_platform": "unknown",
- "id": "Cran-May/SCE-3-24B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5465254413844156
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.597283045074691
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18806646525679757
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3464765100671141
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44347916666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4646775265957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 23.572
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_T.E-8.1/9c9e0887-5561-4789-9521-a3a78e7cfd99.json b/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_T.E-8.1/9c9e0887-5561-4789-9521-a3a78e7cfd99.json
deleted file mode 100644
index fba7e4e44a0f506b5118c6c27c0c9fc7df45a576..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_T.E-8.1/9c9e0887-5561-4789-9521-a3a78e7cfd99.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Cran-May_T.E-8.1/1762652579.513231",
- "retrieved_timestamp": "1762652579.513231",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Cran-May/T.E-8.1",
- "developer": "Cran-May",
- "inference_platform": "unknown",
- "id": "Cran-May/T.E-8.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7076922565459647
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5581754708123893
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44561933534743203
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4505208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4432347074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_merge_model_20250308_2/c457473c-6c40-4930-94b8-993d3b1e8937.json b/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_merge_model_20250308_2/c457473c-6c40-4930-94b8-993d3b1e8937.json
deleted file mode 100644
index cad42a0b7693f6f38a24913cbea13c3b4456ee4e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_merge_model_20250308_2/c457473c-6c40-4930-94b8-993d3b1e8937.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Cran-May_merge_model_20250308_2/1762652579.51357",
- "retrieved_timestamp": "1762652579.5135732",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Cran-May/merge_model_20250308_2",
- "developer": "Cran-May",
- "inference_platform": "unknown",
- "id": "Cran-May/merge_model_20250308_2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5932370554572978
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6585311075974459
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4380664652567976
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39093959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4793541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5419714095744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_merge_model_20250308_3/5448dbb6-9874-4734-8252-369c7b0189d7.json b/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_merge_model_20250308_3/5448dbb6-9874-4734-8252-369c7b0189d7.json
deleted file mode 100644
index 83ef5d528ff3ae4727d67b387717199039c252b9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_merge_model_20250308_3/5448dbb6-9874-4734-8252-369c7b0189d7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Cran-May_merge_model_20250308_3/1762652579.513911",
- "retrieved_timestamp": "1762652579.513912",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Cran-May/merge_model_20250308_3",
- "developer": "Cran-May",
- "inference_platform": "unknown",
- "id": "Cran-May/merge_model_20250308_3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6017799438822324
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6271459892225041
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2545317220543807
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3221476510067114
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43204166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49617686170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_merge_model_20250308_4/45531924-35ad-4baf-9994-5d5fa3bafd02.json b/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_merge_model_20250308_4/45531924-35ad-4baf-9994-5d5fa3bafd02.json
deleted file mode 100644
index 1b6f658741a0f47f7ae5fd2c1548ce0f18ba42b2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_merge_model_20250308_4/45531924-35ad-4baf-9994-5d5fa3bafd02.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Cran-May_merge_model_20250308_4/1762652579.514166",
- "retrieved_timestamp": "1762652579.514167",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Cran-May/merge_model_20250308_4",
- "developer": "Cran-May",
- "inference_platform": "unknown",
- "id": "Cran-May/merge_model_20250308_4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4539521802151624
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.666435217186487
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4199395770392749
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3976510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4688125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5366522606382979
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_tempmotacilla-cinerea-0308/5e5e70f4-c597-415c-ab74-17aaf55b7b28.json b/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_tempmotacilla-cinerea-0308/5e5e70f4-c597-415c-ab74-17aaf55b7b28.json
deleted file mode 100644
index a0a8a2f33bb6037e05930b79efcb82f5a5c26121..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Cran-May/Cran-May_tempmotacilla-cinerea-0308/5e5e70f4-c597-415c-ab74-17aaf55b7b28.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Cran-May_tempmotacilla-cinerea-0308/1762652579.514418",
- "retrieved_timestamp": "1762652579.5144188",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Cran-May/tempmotacilla-cinerea-0308",
- "developer": "Cran-May",
- "inference_platform": "unknown",
- "id": "Cran-May/tempmotacilla-cinerea-0308"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8084837121061007
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6550960569488126
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5551359516616314
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3624161073825503
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42082291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5250166223404256
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/CultriX/CultriX_Qwen2.5-14B-Wernicke-SFT/84bc884e-29be-40b5-bfe2-6147bec90a78.json b/leaderboard_data/HFOpenLLMv2/CultriX/CultriX_Qwen2.5-14B-Wernicke-SFT/84bc884e-29be-40b5-bfe2-6147bec90a78.json
deleted file mode 100644
index 8e4e919a1ee211b27e694ac6040dbd345108527f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/CultriX/CultriX_Qwen2.5-14B-Wernicke-SFT/84bc884e-29be-40b5-bfe2-6147bec90a78.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Wernicke-SFT/1762652579.520046",
- "retrieved_timestamp": "1762652579.5200472",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-Wernicke-SFT",
- "developer": "CultriX",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-Wernicke-SFT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4937443760333692
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6460586236565512
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3595166163141994
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3540268456375839
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38999999999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5069813829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/CultriX/CultriX_Qwestion-14B/c6ad96f2-fcb9-47c5-8106-936436b6ad1b.json b/leaderboard_data/HFOpenLLMv2/CultriX/CultriX_Qwestion-14B/c6ad96f2-fcb9-47c5-8106-936436b6ad1b.json
deleted file mode 100644
index 316cecff4dfa2ca69d6900ce51cfae17e47c3d2f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/CultriX/CultriX_Qwestion-14B/c6ad96f2-fcb9-47c5-8106-936436b6ad1b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwestion-14B/1762652579.521322",
- "retrieved_timestamp": "1762652579.521322",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwestion-14B",
- "developer": "CultriX",
- "inference_platform": "unknown",
- "id": "CultriX/Qwestion-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6317803428237078
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6450104739140539
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3723564954682779
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36828859060402686
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46360416666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.542220744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DRXD1000/DRXD1000_Atlas-7B/1f223500-a1d6-471f-b3cf-2575ab5a52c8.json b/leaderboard_data/HFOpenLLMv2/DRXD1000/DRXD1000_Atlas-7B/1f223500-a1d6-471f-b3cf-2575ab5a52c8.json
deleted file mode 100644
index 0f88fcd87a4a2f2f8b62d5f807463cd99799f093..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DRXD1000/DRXD1000_Atlas-7B/1f223500-a1d6-471f-b3cf-2575ab5a52c8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DRXD1000_Atlas-7B/1762652579.5232708",
- "retrieved_timestamp": "1762652579.523272",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DRXD1000/Atlas-7B",
- "developer": "DRXD1000",
- "inference_platform": "unknown",
- "id": "DRXD1000/Atlas-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3704459722425387
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3302176697760134
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0188821752265861
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33425
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14012632978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 7.768
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DRXD1000/DRXD1000_Phoenix-7B/bff80553-e91f-470e-923c-7f8103d37fca.json b/leaderboard_data/HFOpenLLMv2/DRXD1000/DRXD1000_Phoenix-7B/bff80553-e91f-470e-923c-7f8103d37fca.json
deleted file mode 100644
index d99e78a451437454865f9532d3dbdb377776785b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DRXD1000/DRXD1000_Phoenix-7B/bff80553-e91f-470e-923c-7f8103d37fca.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DRXD1000_Phoenix-7B/1762652579.5236301",
- "retrieved_timestamp": "1762652579.523632",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DRXD1000/Phoenix-7B",
- "developer": "DRXD1000",
- "inference_platform": "unknown",
- "id": "DRXD1000/Phoenix-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3209617149164218
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3931566034728218
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01661631419939577
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2785234899328859
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38494791666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23429188829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DUAL-GPO/DUAL-GPO_zephyr-7b-ipo-0k-15k-i1/a4cd4144-75d5-4c48-a936-96d70f052a66.json b/leaderboard_data/HFOpenLLMv2/DUAL-GPO/DUAL-GPO_zephyr-7b-ipo-0k-15k-i1/a4cd4144-75d5-4c48-a936-96d70f052a66.json
deleted file mode 100644
index 57113a6376a3e9aa03b34cf1b888681801225945..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DUAL-GPO/DUAL-GPO_zephyr-7b-ipo-0k-15k-i1/a4cd4144-75d5-4c48-a936-96d70f052a66.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DUAL-GPO_zephyr-7b-ipo-0k-15k-i1/1762652579.523929",
- "retrieved_timestamp": "1762652579.52393",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DUAL-GPO/zephyr-7b-ipo-0k-15k-i1",
- "developer": "DUAL-GPO",
- "inference_platform": "unknown",
- "id": "DUAL-GPO/zephyr-7b-ipo-0k-15k-i1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27562423259174545
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4472712447565954
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.030211480362537766
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41734374999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31299867021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "?",
- "params_billions": 14.483
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DZgas/DZgas_GIGABATEMAN-7B/180be3a9-1d8e-4705-bda4-032bc66768c6.json b/leaderboard_data/HFOpenLLMv2/DZgas/DZgas_GIGABATEMAN-7B/180be3a9-1d8e-4705-bda4-032bc66768c6.json
deleted file mode 100644
index 100874c4a213c04812f411c6e9a3c39a10ac5c6f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DZgas/DZgas_GIGABATEMAN-7B/180be3a9-1d8e-4705-bda4-032bc66768c6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DZgas_GIGABATEMAN-7B/1762652579.524226",
- "retrieved_timestamp": "1762652579.5242271",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DZgas/GIGABATEMAN-7B",
- "developer": "DZgas",
- "inference_platform": "unknown",
- "id": "DZgas/GIGABATEMAN-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46074637517342876
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5032184342862756
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05513595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43284374999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3176529255319149
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherDrake-SFT/843cbaa0-5d9d-47a8-ae69-fe38a5812136.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherDrake-SFT/843cbaa0-5d9d-47a8-ae69-fe38a5812136.json
deleted file mode 100644
index 89499bf6d98803a34f3a74e6a001c076f1b5b6ec..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherDrake-SFT/843cbaa0-5d9d-47a8-ae69-fe38a5812136.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_AetherDrake-SFT/1762652579.524555",
- "retrieved_timestamp": "1762652579.524556",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/AetherDrake-SFT",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/AetherDrake-SFT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4812796712722244
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48720075507220245
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1510574018126888
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40884375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34990026595744683
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherSett/791a8f9f-5c85-42e5-a06d-270118b0c7c2.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherSett/791a8f9f-5c85-42e5-a06d-270118b0c7c2.json
deleted file mode 100644
index 4bb4a855620b37eb124a5122be1543b2f1c6ee17..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherSett/791a8f9f-5c85-42e5-a06d-270118b0c7c2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_AetherSett/1762652579.524883",
- "retrieved_timestamp": "1762652579.524884",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/AetherSett",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/AetherSett"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5369586031729146
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5451624435465484
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3972809667673716
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46031249999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4278590425531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherTOT/8ac4547d-2b57-4227-a63d-05da4f3ccbc7.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherTOT/8ac4547d-2b57-4227-a63d-05da4f3ccbc7.json
deleted file mode 100644
index f5bc708c8f7e8e7ec69aaad1cc723bcc1dbf12b5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherTOT/8ac4547d-2b57-4227-a63d-05da4f3ccbc7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_AetherTOT/1762652579.5251331",
- "retrieved_timestamp": "1762652579.5251389",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/AetherTOT",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/AetherTOT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4397642699149368
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5066056342472064
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1487915407854985
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3238255033557047
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4078541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38040226063829785
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MllamaForConditionalGeneration",
- "params_billions": 10.67
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherTOT/fa9282c6-7820-49dd-9893-9559c5a984a9.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherTOT/fa9282c6-7820-49dd-9893-9559c5a984a9.json
deleted file mode 100644
index 1d63e8c1c32df0f3066a2879f87dd5e608ca6d61..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherTOT/fa9282c6-7820-49dd-9893-9559c5a984a9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_AetherTOT/1762652579.5253801",
- "retrieved_timestamp": "1762652579.525381",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/AetherTOT",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/AetherTOT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43829040279790954
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5034307630533988
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14425981873111782
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3238255033557047
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40518750000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37782579787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MllamaForConditionalGeneration",
- "params_billions": 10.67
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherUncensored/574d79eb-94ae-4b79-8763-77267d300670.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherUncensored/574d79eb-94ae-4b79-8763-77267d300670.json
deleted file mode 100644
index 39f20c89e4731958b63949eba8b391f6833a6456..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_AetherUncensored/574d79eb-94ae-4b79-8763-77267d300670.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_AetherUncensored/1762652579.525634",
- "retrieved_timestamp": "1762652579.5256362",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/AetherUncensored",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/AetherUncensored"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40419309653940433
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44631282805144945
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14501510574018128
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28859060402684567
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3746770833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27102726063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Cogito-MIS/822268e0-8f66-4bb3-9d01-52c684ca281f.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Cogito-MIS/822268e0-8f66-4bb3-9d01-52c684ca281f.json
deleted file mode 100644
index a56880a092b5aea3e976890c100a96814c526f33..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Cogito-MIS/822268e0-8f66-4bb3-9d01-52c684ca281f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_Cogito-MIS/1762652579.525943",
- "retrieved_timestamp": "1762652579.5259452",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/Cogito-MIS",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/Cogito-MIS"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18145188100905596
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5059981143086196
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08610271903323263
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25671140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37676041666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14353390957446807
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 23.572
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_CogitoDistil/f39e1ca4-2a0f-4650-886b-4160760daee5.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_CogitoDistil/f39e1ca4-2a0f-4650-886b-4160760daee5.json
deleted file mode 100644
index 9cf24eb5641db9ed036dc4ab0798e950bb3fbc0e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_CogitoDistil/f39e1ca4-2a0f-4650-886b-4160760daee5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_CogitoDistil/1762652579.526295",
- "retrieved_timestamp": "1762652579.5262961",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/CogitoDistil",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/CogitoDistil"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27764775240805506
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36767660461416857
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39274924471299094
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3754895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625498670212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_CogitoZ/5e08074c-32bd-4ce6-a09f-7b5832cba288.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_CogitoZ/5e08074c-32bd-4ce6-a09f-7b5832cba288.json
deleted file mode 100644
index 08c0282b1c942ac8758e5abb1c40bfc9e7a06445..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_CogitoZ/5e08074c-32bd-4ce6-a09f-7b5832cba288.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_CogitoZ/1762652579.5265448",
- "retrieved_timestamp": "1762652579.526546",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/CogitoZ",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/CogitoZ"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3967240255854466
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6734487392645502
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5241691842900302
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3951342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4792604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5592586436170213
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_CogitoZ14/024f23d8-66b0-4a7b-be01-fd68f0ab295e.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_CogitoZ14/024f23d8-66b0-4a7b-be01-fd68f0ab295e.json
deleted file mode 100644
index c7e763d4700efce4bd84ba7a33c16db11cd37854..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_CogitoZ14/024f23d8-66b0-4a7b-be01-fd68f0ab295e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_CogitoZ14/1762652579.526777",
- "retrieved_timestamp": "1762652579.5267782",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/CogitoZ14",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/CogitoZ14"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6637034180419066
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6297514788808327
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42220543806646527
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.405875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39993351063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_DocumentCogito/6d2a742b-adde-4b6d-90d4-ebefbb2b61be.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_DocumentCogito/6d2a742b-adde-4b6d-90d4-ebefbb2b61be.json
deleted file mode 100644
index 44b579dd52cc4e8a6a0ee8cc69e120c814d553c2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_DocumentCogito/6d2a742b-adde-4b6d-90d4-ebefbb2b61be.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_DocumentCogito/1762652579.5270069",
- "retrieved_timestamp": "1762652579.527008",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/DocumentCogito",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/DocumentCogito"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5064340394597445
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5111563719111275
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16314199395770393
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3973125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38023603723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MllamaForConditionalGeneration",
- "params_billions": 10.67
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_DocumentCogito/9a638bb6-f16f-496b-a974-d97dbb6cd626.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_DocumentCogito/9a638bb6-f16f-496b-a974-d97dbb6cd626.json
deleted file mode 100644
index 8aeeac07f4283abb0ba9c5b411473ff6f15ed7e0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_DocumentCogito/9a638bb6-f16f-496b-a974-d97dbb6cd626.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_DocumentCogito/1762652579.527227",
- "retrieved_timestamp": "1762652579.5272279",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/DocumentCogito",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/DocumentCogito"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7770349339751859
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5186726621665779
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21978851963746224
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39105208333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3737533244680851
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MllamaForConditionalGeneration",
- "params_billions": 10.67
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_MawaredT1/1e87d1ea-59df-4c1a-96da-31e12e27dab2.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_MawaredT1/1e87d1ea-59df-4c1a-96da-31e12e27dab2.json
deleted file mode 100644
index da1bc48b891dbc347a9360ab36f455299a2e8800..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_MawaredT1/1e87d1ea-59df-4c1a-96da-31e12e27dab2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_MawaredT1/1762652579.527918",
- "retrieved_timestamp": "1762652579.527919",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/MawaredT1",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/MawaredT1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41988036188424493
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5214815439293661
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3021148036253776
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3347315436241611
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47020833333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4718251329787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Mini_QwQ/7d5c59eb-c6fb-414a-9e4e-44d1d56f7401.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Mini_QwQ/7d5c59eb-c6fb-414a-9e4e-44d1d56f7401.json
deleted file mode 100644
index 8aebd643552d63888a42a70978e497b702cecf36..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Mini_QwQ/7d5c59eb-c6fb-414a-9e4e-44d1d56f7401.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_Mini_QwQ/1762652579.528199",
- "retrieved_timestamp": "1762652579.5282",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/Mini_QwQ",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/Mini_QwQ"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44970566984490046
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.554898906584336
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41918429003021146
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46825
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.437250664893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_NemoR/a2da90e0-5f59-4c89-b819-316d2cc318be.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_NemoR/a2da90e0-5f59-4c89-b819-316d2cc318be.json
deleted file mode 100644
index c6d74e255b6ba94778859b164a725213eeeaf87f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_NemoR/a2da90e0-5f59-4c89-b819-316d2cc318be.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_NemoR/1762652579.528459",
- "retrieved_timestamp": "1762652579.528459",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/NemoR",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/NemoR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2287375275380435
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5194067688446361
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08308157099697885
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3271812080536913
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39080208333333327
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32903922872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 6.124
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathFinderAI2.0/274ab6b9-5fd7-41df-9076-b16c52947640.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathFinderAI2.0/274ab6b9-5fd7-41df-9076-b16c52947640.json
deleted file mode 100644
index 7669e28dc235ca3b807becfae22194e216c01b46..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathFinderAI2.0/274ab6b9-5fd7-41df-9076-b16c52947640.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_PathFinderAI2.0/1762652579.528686",
- "retrieved_timestamp": "1762652579.528686",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/PathFinderAI2.0",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/PathFinderAI2.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45410178326839457
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.665823006477417
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5075528700906344
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4215625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5546875
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathFinderAi3.0/ba3924c6-f913-4094-a56a-1699f07f103c.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathFinderAi3.0/ba3924c6-f913-4094-a56a-1699f07f103c.json
deleted file mode 100644
index a6c8bdb24ac1f388b3dcb809763d6f511f02f5b9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathFinderAi3.0/ba3924c6-f913-4094-a56a-1699f07f103c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_PathFinderAi3.0/1762652579.5289202",
- "retrieved_timestamp": "1762652579.5289202",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/PathFinderAi3.0",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/PathFinderAi3.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42709898624538445
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6884221416328996
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5045317220543807
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4085570469798658
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4806875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5757147606382979
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathfinderAI/445f2c79-2c47-465c-ace7-73b3fa491454.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathfinderAI/445f2c79-2c47-465c-ace7-73b3fa491454.json
deleted file mode 100644
index 9badb0279a37da0abd6f279a2886bb475b1850e8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathfinderAI/445f2c79-2c47-465c-ace7-73b3fa491454.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_PathfinderAI/1762652579.529176",
- "retrieved_timestamp": "1762652579.5291772",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/PathfinderAI",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/PathfinderAI"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37451739163198094
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6667854331232542
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47583081570996977
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39429530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48583333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.559341755319149
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathfinderAI/c07f2943-f3f4-46be-993e-be56dadcb561.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathfinderAI/c07f2943-f3f4-46be-993e-be56dadcb561.json
deleted file mode 100644
index ac333a09b04eebbb8ac83590878a522b9325679c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PathfinderAI/c07f2943-f3f4-46be-993e-be56dadcb561.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_PathfinderAI/1762652579.5294342",
- "retrieved_timestamp": "1762652579.529435",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/PathfinderAI",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/PathfinderAI"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4855006937148987
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6627335380624046
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48413897280966767
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42559375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.554188829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PixelParse_AI/29459932-a7a5-458f-9778-e236cc4ea985.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PixelParse_AI/29459932-a7a5-458f-9778-e236cc4ea985.json
deleted file mode 100644
index aa668632a96e425e81313f49787c4662a928c701..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_PixelParse_AI/29459932-a7a5-458f-9778-e236cc4ea985.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_PixelParse_AI/1762652579.529871",
- "retrieved_timestamp": "1762652579.529872",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/PixelParse_AI",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/PixelParse_AI"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43829040279790954
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5034307630533988
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1472809667673716
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3238255033557047
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40518750000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37782579787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MllamaForConditionalGeneration",
- "params_billions": 10.67
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_RA2.0/3baf9882-5625-47eb-a88b-b172dfc9a330.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_RA2.0/3baf9882-5625-47eb-a88b-b172dfc9a330.json
deleted file mode 100644
index 51186af7249f1e821a4b000f8f773e47be35c058..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_RA2.0/3baf9882-5625-47eb-a88b-b172dfc9a330.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_RA2.0/1762652579.53008",
- "retrieved_timestamp": "1762652579.530081",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/RA2.0",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/RA2.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37838934028378035
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4888687006782508
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38368580060422963
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40912499999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26163563829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_RA_Reasoner/ab74d5ca-6c80-44de-96e9-af61861090b6.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_RA_Reasoner/ab74d5ca-6c80-44de-96e9-af61861090b6.json
deleted file mode 100644
index b1f4c3ee011c7239750543b2b8c11f26c94a4304..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_RA_Reasoner/ab74d5ca-6c80-44de-96e9-af61861090b6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_RA_Reasoner/1762652579.530283",
- "retrieved_timestamp": "1762652579.530284",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/RA_Reasoner",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/RA_Reasoner"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.559215104810791
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6053692417205033
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2122356495468278
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3963541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43001994680851063
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.306
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_RA_Reasoner2.0/5cf9872a-6d67-4b42-bfe4-abad05bdd9cf.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_RA_Reasoner2.0/5cf9872a-6d67-4b42-bfe4-abad05bdd9cf.json
deleted file mode 100644
index e5a5769990591bf5eac130471bb3aded5ea3c89a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_RA_Reasoner2.0/5cf9872a-6d67-4b42-bfe4-abad05bdd9cf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_RA_Reasoner2.0/1762652579.530484",
- "retrieved_timestamp": "1762652579.530485",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/RA_Reasoner2.0",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/RA_Reasoner2.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5366339091388627
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6062469551969276
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2311178247734139
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32466442953020136
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3883541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4353390957446808
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.306
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_ReasonTest/39d481bf-ea86-42a7-a6f1-ce38ce9dce30.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_ReasonTest/39d481bf-ea86-42a7-a6f1-ce38ce9dce30.json
deleted file mode 100644
index 2eae2e44d4fec5070c7fd4fe19c2eb0054ed1016..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_ReasonTest/39d481bf-ea86-42a7-a6f1-ce38ce9dce30.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_ReasonTest/1762652579.530685",
- "retrieved_timestamp": "1762652579.530686",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/ReasonTest",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/ReasonTest"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4079653098223824
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.543526397621609
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21374622356495468
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3187919463087248
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43154166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4271941489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.808
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Research_PathfinderAI/900e5686-083d-460c-918f-06a39936810c.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Research_PathfinderAI/900e5686-083d-460c-918f-06a39936810c.json
deleted file mode 100644
index 57dbc59cbdd0d4e1448f354195d071a3734c4a5b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Research_PathfinderAI/900e5686-083d-460c-918f-06a39936810c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_Research_PathfinderAI/1762652579.530894",
- "retrieved_timestamp": "1762652579.530895",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/Research_PathfinderAI",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/Research_PathfinderAI"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3456916537010687
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.287225755504323
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16993957703927492
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2407718120805369
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33939583333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11303191489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Zirel-7B-Math/460de6c8-d706-420b-9c0a-a108ddb11e5f.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Zirel-7B-Math/460de6c8-d706-420b-9c0a-a108ddb11e5f.json
deleted file mode 100644
index 6abcd7894505d59eecadbe11dbcdee1e3c04fa3a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Zirel-7B-Math/460de6c8-d706-420b-9c0a-a108ddb11e5f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_Zirel-7B-Math/1762652579.531958",
- "retrieved_timestamp": "1762652579.531959",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/Zirel-7B-Math",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/Zirel-7B-Math"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6638785090227264
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5447698777469486
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19788519637462235
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3263422818791946
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47891666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4237034574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Zirel_1.5/661e2393-2560-4d25-a6f3-f0d680052e8e.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Zirel_1.5/661e2393-2560-4d25-a6f3-f0d680052e8e.json
deleted file mode 100644
index 44e7be9b8700430174c750f97faedbad79929f10..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_Zirel_1.5/661e2393-2560-4d25-a6f3-f0d680052e8e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_Zirel_1.5/1762652579.532257",
- "retrieved_timestamp": "1762652579.532258",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/Zirel_1.5",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/Zirel_1.5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4167575366693706
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3984669254999634
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11329305135951662
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36581250000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21434507978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_mini-Cogito-R1/faac8ed1-1042-42dc-9762-3f90161fb34f.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_mini-Cogito-R1/faac8ed1-1042-42dc-9762-3f90161fb34f.json
deleted file mode 100644
index c050631b8b4e8fd0e517de6e6ce45d6cab02afaa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_mini-Cogito-R1/faac8ed1-1042-42dc-9762-3f90161fb34f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_mini-Cogito-R1/1762652579.532486",
- "retrieved_timestamp": "1762652579.532487",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/mini-Cogito-R1",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/mini-Cogito-R1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2298368329366082
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3280491875175077
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27492447129909364
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28691275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34469791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14818816489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_mini_Pathfinder/a9afd0b3-8189-47e0-9e33-d60540679e20.json b/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_mini_Pathfinder/a9afd0b3-8189-47e0-9e33-d60540679e20.json
deleted file mode 100644
index 6dbd700d9e57495ec735787cbcf88c0398d5432f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Daemontatox/Daemontatox_mini_Pathfinder/a9afd0b3-8189-47e0-9e33-d60540679e20.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Daemontatox_mini_Pathfinder/1762652579.53272",
- "retrieved_timestamp": "1762652579.5327208",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Daemontatox/mini_Pathfinder",
- "developer": "Daemontatox",
- "inference_platform": "unknown",
- "id": "Daemontatox/mini_Pathfinder"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29615752869054107
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39556911910803755
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47507552870090636
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25838926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37809374999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28091755319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Dampfinchen/Dampfinchen_Llama-3.1-8B-Ultra-Instruct/2a0d23aa-47ae-4974-ac64-5371097a1b0f.json b/leaderboard_data/HFOpenLLMv2/Dampfinchen/Dampfinchen_Llama-3.1-8B-Ultra-Instruct/2a0d23aa-47ae-4974-ac64-5371097a1b0f.json
deleted file mode 100644
index 4fdc019de8e0304a878d768bf9f1e5220f0f5a45..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Dampfinchen/Dampfinchen_Llama-3.1-8B-Ultra-Instruct/2a0d23aa-47ae-4974-ac64-5371097a1b0f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Dampfinchen_Llama-3.1-8B-Ultra-Instruct/1762652579.532935",
- "retrieved_timestamp": "1762652579.532935",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Dampfinchen/Llama-3.1-8B-Ultra-Instruct",
- "developer": "Dampfinchen",
- "inference_platform": "unknown",
- "id": "Dampfinchen/Llama-3.1-8B-Ultra-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8081091503876381
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5257532452246574
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22054380664652568
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40032291666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.382563164893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Danielbrdz/Danielbrdz_Barcenas-10b/acdaefdc-b28c-4081-bf72-517d6c70595e.json b/leaderboard_data/HFOpenLLMv2/Danielbrdz/Danielbrdz_Barcenas-10b/acdaefdc-b28c-4081-bf72-517d6c70595e.json
deleted file mode 100644
index 07da5b577a7f97ba31620c44e21599c33d93fd00..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Danielbrdz/Danielbrdz_Barcenas-10b/acdaefdc-b28c-4081-bf72-517d6c70595e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-10b/1762652579.533203",
- "retrieved_timestamp": "1762652579.533203",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Danielbrdz/Barcenas-10b",
- "developer": "Danielbrdz",
- "inference_platform": "unknown",
- "id": "Danielbrdz/Barcenas-10b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6607811717354397
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6120828494270083
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21525679758308158
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3414429530201342
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41346875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4360871010638298
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.306
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Danielbrdz/Danielbrdz_Barcenas-3b-GRPO/88a3b40a-3ba2-4f13-bd8c-110872d807c7.json b/leaderboard_data/HFOpenLLMv2/Danielbrdz/Danielbrdz_Barcenas-3b-GRPO/88a3b40a-3ba2-4f13-bd8c-110872d807c7.json
deleted file mode 100644
index da2f614a900a19abf1fde269a17668205a6a3f98..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Danielbrdz/Danielbrdz_Barcenas-3b-GRPO/88a3b40a-3ba2-4f13-bd8c-110872d807c7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-3b-GRPO/1762652579.534181",
- "retrieved_timestamp": "1762652579.5341818",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Danielbrdz/Barcenas-3b-GRPO",
- "developer": "Danielbrdz",
- "inference_platform": "unknown",
- "id": "Danielbrdz/Barcenas-3b-GRPO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5444276741268723
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44143515175110304
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13746223564954682
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35759375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036901595744681
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_12b-mn-dans-reasoning-test-2/bd21f54f-6b0c-4db9-bb46-7a4c60f960ae.json b/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_12b-mn-dans-reasoning-test-2/bd21f54f-6b0c-4db9-bb46-7a4c60f960ae.json
deleted file mode 100644
index 6074f9aa8ac0ab6f596ac9194ff17d257334fd46..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_12b-mn-dans-reasoning-test-2/bd21f54f-6b0c-4db9-bb46-7a4c60f960ae.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_12b-mn-dans-reasoning-test-2/1762652579.534956",
- "retrieved_timestamp": "1762652579.5349572",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Dans-DiscountModels/12b-mn-dans-reasoning-test-2",
- "developer": "Dans-DiscountModels",
- "inference_platform": "unknown",
- "id": "Dans-DiscountModels/12b-mn-dans-reasoning-test-2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3710953603106424
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48070333147041405
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0634441087613293
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27348993288590606
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37021875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2507480053191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_12b-mn-dans-reasoning-test-3/c9dedad4-65d4-479e-b465-912cd8885e32.json b/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_12b-mn-dans-reasoning-test-3/c9dedad4-65d4-479e-b465-912cd8885e32.json
deleted file mode 100644
index 4348b94528907fad2837cb756cced3e3c9e7249b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_12b-mn-dans-reasoning-test-3/c9dedad4-65d4-479e-b465-912cd8885e32.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_12b-mn-dans-reasoning-test-3/1762652579.535208",
- "retrieved_timestamp": "1762652579.535209",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Dans-DiscountModels/12b-mn-dans-reasoning-test-3",
- "developer": "Dans-DiscountModels",
- "inference_platform": "unknown",
- "id": "Dans-DiscountModels/12b-mn-dans-reasoning-test-3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5052593784491815
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48388753289945696
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07779456193353475
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4167604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2515791223404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-CoreCurriculum-12b-ChatML/6b61018c-249d-482b-a787-06f1e6514f29.json b/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-CoreCurriculum-12b-ChatML/6b61018c-249d-482b-a787-06f1e6514f29.json
deleted file mode 100644
index 1582ce7e8e062365d44a8e566bfc078de41d5eb0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-CoreCurriculum-12b-ChatML/6b61018c-249d-482b-a787-06f1e6514f29.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-CoreCurriculum-12b-ChatML/1762652579.535429",
- "retrieved_timestamp": "1762652579.53543",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML",
- "developer": "Dans-DiscountModels",
- "inference_platform": "unknown",
- "id": "Dans-DiscountModels/Dans-Instruct-CoreCurriculum-12b-ChatML"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21110209798889168
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4791864789096407
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04305135951661632
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3606354166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2805019946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.1.0/9873b58d-1ffd-44a7-bb93-15038986419a.json b/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.1.0/9873b58d-1ffd-44a7-bb93-15038986419a.json
deleted file mode 100644
index f43831e00ae44449bb56486de942545d76fa00aa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.1.0/9873b58d-1ffd-44a7-bb93-15038986419a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.1.0/1762652579.5358772",
- "retrieved_timestamp": "1762652579.535878",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0",
- "developer": "Dans-DiscountModels",
- "inference_platform": "unknown",
- "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06682048076880455
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47747656219777285
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06722054380664652
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2860738255033557
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3785833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.328374335106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.1.1/71656625-cd85-49a6-a8df-abc0b9c0ae5d.json b/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.1.1/71656625-cd85-49a6-a8df-abc0b9c0ae5d.json
deleted file mode 100644
index 89fc7ebcd4231480ba4bfc8877b3a64f93bd8487..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.1.1/71656625-cd85-49a6-a8df-abc0b9c0ae5d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.1.1/1762652579.5360918",
- "retrieved_timestamp": "1762652579.5360918",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1",
- "developer": "Dans-DiscountModels",
- "inference_platform": "unknown",
- "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09105063453857985
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4748653313732898
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05966767371601209
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3824895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.327875664893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.2.0/d47dc284-0ed6-4853-8a54-b87b4b529150.json b/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.2.0/d47dc284-0ed6-4853-8a54-b87b4b529150.json
deleted file mode 100644
index 046914817ca86d1e41fbbc203e5fbf827a79f08c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.2.0/d47dc284-0ed6-4853-8a54-b87b4b529150.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML-V0.2.0/1762652579.536302",
- "retrieved_timestamp": "1762652579.536303",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0",
- "developer": "Dans-DiscountModels",
- "inference_platform": "unknown",
- "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML-V0.2.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5064085515321569
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4624263551503409
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07326283987915408
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3644479166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2999501329787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML/60db255b-d34c-4f33-91a4-279a9ccc6791.json b/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML/60db255b-d34c-4f33-91a4-279a9ccc6791.json
deleted file mode 100644
index e3694327583b19fbc5e0f71b3b98e73c0a1662d9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Dans-DiscountModels/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML/60db255b-d34c-4f33-91a4-279a9ccc6791.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Dans-DiscountModels_Dans-Instruct-Mix-8b-ChatML/1762652579.5356538",
- "retrieved_timestamp": "1762652579.535655",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML",
- "developer": "Dans-DiscountModels",
- "inference_platform": "unknown",
- "id": "Dans-DiscountModels/Dans-Instruct-Mix-8b-ChatML"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08250774611364513
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4738171816307924
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05513595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3918229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32878989361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Darkknight535/Darkknight535_OpenCrystal-12B-L3/8edb0a0d-994b-4b97-b9a7-7f46ba0e7365.json b/leaderboard_data/HFOpenLLMv2/Darkknight535/Darkknight535_OpenCrystal-12B-L3/8edb0a0d-994b-4b97-b9a7-7f46ba0e7365.json
deleted file mode 100644
index 973da37f6a722e4af835e781b0b30f8ca2e6cb63..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Darkknight535/Darkknight535_OpenCrystal-12B-L3/8edb0a0d-994b-4b97-b9a7-7f46ba0e7365.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Darkknight535_OpenCrystal-12B-L3/1762652579.5369642",
- "retrieved_timestamp": "1762652579.5369651",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Darkknight535/OpenCrystal-12B-L3",
- "developer": "Darkknight535",
- "inference_platform": "unknown",
- "id": "Darkknight535/OpenCrystal-12B-L3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4070909630890482
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5222598504945516
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08987915407854985
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36565625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3640292553191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 11.52
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-DARKEST-PLANET-16.5B/2c317db5-86fa-41fd-8f1e-3cf08ba91cde.json b/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-DARKEST-PLANET-16.5B/2c317db5-86fa-41fd-8f1e-3cf08ba91cde.json
deleted file mode 100644
index b3f9b01688524f477e3dbc6d3ca34b18a0b6a415..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-DARKEST-PLANET-16.5B/2c317db5-86fa-41fd-8f1e-3cf08ba91cde.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DavidAU_L3-DARKEST-PLANET-16.5B/1762652579.540939",
- "retrieved_timestamp": "1762652579.54094",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DavidAU/L3-DARKEST-PLANET-16.5B",
- "developer": "DavidAU",
- "inference_platform": "unknown",
- "id": "DavidAU/L3-DARKEST-PLANET-16.5B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6230623634179533
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5230436906708896
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08987915407854985
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3753645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.363031914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 16.537
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Dark-Planet-8B/f5c2a2cc-392e-4337-aad9-72d65ba87aab.json b/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Dark-Planet-8B/f5c2a2cc-392e-4337-aad9-72d65ba87aab.json
deleted file mode 100644
index 774dc40102833cfff9d5f3368300edbcd08287c6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Dark-Planet-8B/f5c2a2cc-392e-4337-aad9-72d65ba87aab.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DavidAU_L3-Dark-Planet-8B/1762652579.5412621",
- "retrieved_timestamp": "1762652579.541263",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DavidAU/L3-Dark-Planet-8B",
- "developer": "DavidAU",
- "inference_platform": "unknown",
- "id": "DavidAU/L3-Dark-Planet-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4134108609600305
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5084081453197787
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0823262839879154
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36159375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37367021276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Jamet-12.2B-MK.V-Blackroot-Instruct/85a1ef3f-7d68-4324-876d-b52cfa71317d.json b/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Jamet-12.2B-MK.V-Blackroot-Instruct/85a1ef3f-7d68-4324-876d-b52cfa71317d.json
deleted file mode 100644
index 73c09edee4d6b44f40073ca3cd5ba96e8aefa637..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Jamet-12.2B-MK.V-Blackroot-Instruct/85a1ef3f-7d68-4324-876d-b52cfa71317d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DavidAU_L3-Jamet-12.2B-MK.V-Blackroot-Instruct/1762652579.541475",
- "retrieved_timestamp": "1762652579.541475",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct",
- "developer": "DavidAU",
- "inference_platform": "unknown",
- "id": "DavidAU/L3-Jamet-12.2B-MK.V-Blackroot-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3961998608137519
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4765717717789398
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04078549848942598
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2785234899328859
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40196875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3291223404255319
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 12.174
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Lumimaid-12.2B-v0.1-OAS-Instruct/a8fe768d-f988-4fba-be80-2f5cc22dfd9d.json b/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Lumimaid-12.2B-v0.1-OAS-Instruct/a8fe768d-f988-4fba-be80-2f5cc22dfd9d.json
deleted file mode 100644
index d1deb236f4eb03e544afecc0b8b78eb4b56bbc18..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Lumimaid-12.2B-v0.1-OAS-Instruct/a8fe768d-f988-4fba-be80-2f5cc22dfd9d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DavidAU_L3-Lumimaid-12.2B-v0.1-OAS-Instruct/1762652579.541698",
- "retrieved_timestamp": "1762652579.5416992",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct",
- "developer": "DavidAU",
- "inference_platform": "unknown",
- "id": "DavidAU/L3-Lumimaid-12.2B-v0.1-OAS-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3924032677739509
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46930207579694677
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04607250755287009
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27684563758389263
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41942708333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31416223404255317
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 12.174
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-SMB-Instruct-12.2B-F32/970cfd49-b72c-4cf5-af05-1ecfc57c94d8.json b/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-SMB-Instruct-12.2B-F32/970cfd49-b72c-4cf5-af05-1ecfc57c94d8.json
deleted file mode 100644
index 3f9b310d14bb302f5557a3bab7f3523e491422ec..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-SMB-Instruct-12.2B-F32/970cfd49-b72c-4cf5-af05-1ecfc57c94d8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DavidAU_L3-SMB-Instruct-12.2B-F32/1762652579.541919",
- "retrieved_timestamp": "1762652579.54192",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DavidAU/L3-SMB-Instruct-12.2B-F32",
- "developer": "DavidAU",
- "inference_platform": "unknown",
- "id": "DavidAU/L3-SMB-Instruct-12.2B-F32"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4303215468290802
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4786412360346213
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04682779456193353
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28187919463087246
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40872916666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3312001329787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 12.174
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Stheno-Maid-Blackroot-Grand-HORROR-16B/9dbf220a-cbe9-40da-814f-951205c3abbe.json b/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Stheno-Maid-Blackroot-Grand-HORROR-16B/9dbf220a-cbe9-40da-814f-951205c3abbe.json
deleted file mode 100644
index bc0f245cc6b872e493213f17007d87d86f19a305..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Stheno-Maid-Blackroot-Grand-HORROR-16B/9dbf220a-cbe9-40da-814f-951205c3abbe.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DavidAU_L3-Stheno-Maid-Blackroot-Grand-HORROR-16B/1762652579.542142",
- "retrieved_timestamp": "1762652579.5421429",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B",
- "developer": "DavidAU",
- "inference_platform": "unknown",
- "id": "DavidAU/L3-Stheno-Maid-Blackroot-Grand-HORROR-16B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34389309254998957
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4736328900737677
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02190332326283988
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40311458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3570478723404255
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 16.537
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Stheno-v3.2-12.2B-Instruct/51566db6-56e4-40bd-a248-6c968f2b83e8.json b/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Stheno-v3.2-12.2B-Instruct/51566db6-56e4-40bd-a248-6c968f2b83e8.json
deleted file mode 100644
index 51769c270fad4b7ac981fc751ed1018861b5bfab..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3-Stheno-v3.2-12.2B-Instruct/51566db6-56e4-40bd-a248-6c968f2b83e8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DavidAU_L3-Stheno-v3.2-12.2B-Instruct/1762652579.542359",
- "retrieved_timestamp": "1762652579.54236",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DavidAU/L3-Stheno-v3.2-12.2B-Instruct",
- "developer": "DavidAU",
- "inference_platform": "unknown",
- "id": "DavidAU/L3-Stheno-v3.2-12.2B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4027945850343755
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4845980190500647
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05060422960725076
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2751677852348993
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41025
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3345246010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 12.174
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3.1-Dark-Planet-SpinFire-Uncensored-8B/0982d599-57c7-4eeb-bd47-844879bb79a5.json b/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3.1-Dark-Planet-SpinFire-Uncensored-8B/0982d599-57c7-4eeb-bd47-844879bb79a5.json
deleted file mode 100644
index 62dccdf4b20a61949cfd222efaf336e719b2eef1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3.1-Dark-Planet-SpinFire-Uncensored-8B/0982d599-57c7-4eeb-bd47-844879bb79a5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DavidAU_L3.1-Dark-Planet-SpinFire-Uncensored-8B/1762652579.542578",
- "retrieved_timestamp": "1762652579.542578",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B",
- "developer": "DavidAU",
- "inference_platform": "unknown",
- "id": "DavidAU/L3.1-Dark-Planet-SpinFire-Uncensored-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7042702252246262
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5260910165037093
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09290030211480363
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.354125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3670212765957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B/a7df9a84-fa29-4c8e-8413-4542b5eafb63.json b/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B/a7df9a84-fa29-4c8e-8413-4542b5eafb63.json
deleted file mode 100644
index 8c60a12c1eef707aa88f089030ef447aa0f4eeb2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DavidAU/DavidAU_L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B/a7df9a84-fa29-4c8e-8413-4542b5eafb63.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DavidAU_L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B/1762652579.542795",
- "retrieved_timestamp": "1762652579.5427961",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B",
- "developer": "DavidAU",
- "inference_platform": "unknown",
- "id": "DavidAU/L3.1-MOE-2X8B-Deepseek-DeepHermes-e32-uncensored-abliterated-13.7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3345257250761313
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4420822344441435
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26057401812688824
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37486458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2892287234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 13.668
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Davidsv/Davidsv_SUONG-1/097e6cbe-88cd-4d61-bb4c-0b8ddb537abe.json b/leaderboard_data/HFOpenLLMv2/Davidsv/Davidsv_SUONG-1/097e6cbe-88cd-4d61-bb4c-0b8ddb537abe.json
deleted file mode 100644
index d9d0aaaa6aeeac787216b50add0eef9fc7d772c1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Davidsv/Davidsv_SUONG-1/097e6cbe-88cd-4d61-bb4c-0b8ddb537abe.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Davidsv_SUONG-1/1762652579.5439382",
- "retrieved_timestamp": "1762652579.54394",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Davidsv/SUONG-1",
- "developer": "Davidsv",
- "inference_platform": "unknown",
- "id": "Davidsv/SUONG-1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2497207409673001
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28171339082318814
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24412751677852348
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35775
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1085438829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 2.879
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DavieLion/DavieLion_Lllma-3.2-1B/274ed35b-4abe-4f20-bd18-7e386a7fdaa5.json b/leaderboard_data/HFOpenLLMv2/DavieLion/DavieLion_Lllma-3.2-1B/274ed35b-4abe-4f20-bd18-7e386a7fdaa5.json
deleted file mode 100644
index f49a90f712c907d792bf5007851dd9cd0dee4c1c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DavieLion/DavieLion_Lllma-3.2-1B/274ed35b-4abe-4f20-bd18-7e386a7fdaa5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DavieLion_Lllma-3.2-1B/1762652579.5458188",
- "retrieved_timestamp": "1762652579.54582",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DavieLion/Lllma-3.2-1B",
- "developer": "DavieLion",
- "inference_platform": "unknown",
- "id": "DavieLion/Lllma-3.2-1B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1601439735457475
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2964692268500723
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.006797583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24412751677852348
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35781250000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11261635638297872
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.236
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DebateLabKIT/DebateLabKIT_Llama-3.1-Argunaut-1-8B-SFT/ea40f65f-60a8-4efa-aa8d-e2a64ef5999f.json b/leaderboard_data/HFOpenLLMv2/DebateLabKIT/DebateLabKIT_Llama-3.1-Argunaut-1-8B-SFT/ea40f65f-60a8-4efa-aa8d-e2a64ef5999f.json
deleted file mode 100644
index b2b91ce5cbe886602e468a0947edf2e865a92e40..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DebateLabKIT/DebateLabKIT_Llama-3.1-Argunaut-1-8B-SFT/ea40f65f-60a8-4efa-aa8d-e2a64ef5999f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DebateLabKIT_Llama-3.1-Argunaut-1-8B-SFT/1762652579.546083",
- "retrieved_timestamp": "1762652579.5460842",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT",
- "developer": "DebateLabKIT",
- "inference_platform": "unknown",
- "id": "DebateLabKIT/Llama-3.1-Argunaut-1-8B-SFT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.551921124837653
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48238301936695316
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14501510574018128
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2835570469798658
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4503020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3472406914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Deci/Deci_DeciLM-7B-instruct/1b3a2041-d14f-44d1-9efd-dbeceaa67ee6.json b/leaderboard_data/HFOpenLLMv2/Deci/Deci_DeciLM-7B-instruct/1b3a2041-d14f-44d1-9efd-dbeceaa67ee6.json
deleted file mode 100644
index e10f9312727e123fb8de6df98d10cf9c9ec5388f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Deci/Deci_DeciLM-7B-instruct/1b3a2041-d14f-44d1-9efd-dbeceaa67ee6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Deci_DeciLM-7B-instruct/1762652579.546672",
- "retrieved_timestamp": "1762652579.546672",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Deci/DeciLM-7B-instruct",
- "developer": "Deci",
- "inference_platform": "unknown",
- "id": "Deci/DeciLM-7B-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4880239985460799
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4589748654047652
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.030211480362537766
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38841666666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26080452127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "DeciLMForCausalLM",
- "params_billions": 7.044
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Deci/Deci_DeciLM-7B/f9d2408b-03dd-4cf8-851e-51a15ff13be9.json b/leaderboard_data/HFOpenLLMv2/Deci/Deci_DeciLM-7B/f9d2408b-03dd-4cf8-851e-51a15ff13be9.json
deleted file mode 100644
index 72de39763a1e08c2dd34b355bcd21d0585336e5d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Deci/Deci_DeciLM-7B/f9d2408b-03dd-4cf8-851e-51a15ff13be9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Deci_DeciLM-7B/1762652579.5463831",
- "retrieved_timestamp": "1762652579.5463839",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Deci/DeciLM-7B",
- "developer": "Deci",
- "inference_platform": "unknown",
- "id": "Deci/DeciLM-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28129474239462404
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44228566674266495
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.028700906344410877
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43585416666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26919880319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "DeciLMForCausalLM",
- "params_billions": 7.044
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DeepAutoAI/DeepAutoAI_d2nwg_Llama-3.1-8B-Instruct-v0.0/d5d73b84-4436-47bf-967e-c9be94898189.json b/leaderboard_data/HFOpenLLMv2/DeepAutoAI/DeepAutoAI_d2nwg_Llama-3.1-8B-Instruct-v0.0/d5d73b84-4436-47bf-967e-c9be94898189.json
deleted file mode 100644
index f6f1d86a600c4a3c3acafb3b343b4cf888bcc431..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DeepAutoAI/DeepAutoAI_d2nwg_Llama-3.1-8B-Instruct-v0.0/d5d73b84-4436-47bf-967e-c9be94898189.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DeepAutoAI_d2nwg_Llama-3.1-8B-Instruct-v0.0/1762652579.548984",
- "retrieved_timestamp": "1762652579.548985",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0",
- "developer": "DeepAutoAI",
- "inference_platform": "unknown",
- "id": "DeepAutoAI/d2nwg_Llama-3.1-8B-Instruct-v0.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7892746800711002
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5080411642065981
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18051359516616314
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41346875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3877160904255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DeepAutoAI/DeepAutoAI_ldm_soup_Llama-3.1-8B-Instruct-v0.0/fb8eb882-26a9-4008-9226-90d44d38b54f.json b/leaderboard_data/HFOpenLLMv2/DeepAutoAI/DeepAutoAI_ldm_soup_Llama-3.1-8B-Instruct-v0.0/fb8eb882-26a9-4008-9226-90d44d38b54f.json
deleted file mode 100644
index ee036c49c9d75ce1e94bd8e5daa09c36caccf8be..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DeepAutoAI/DeepAutoAI_ldm_soup_Llama-3.1-8B-Instruct-v0.0/fb8eb882-26a9-4008-9226-90d44d38b54f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DeepAutoAI_ldm_soup_Llama-3.1-8B-Instruct-v0.0/1762652579.5500422",
- "retrieved_timestamp": "1762652579.5500429",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0",
- "developer": "DeepAutoAI",
- "inference_platform": "unknown",
- "id": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7889499860370484
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5125175335277464
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19184290030211482
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41213541666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38954454787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DeepAutoAI/DeepAutoAI_ldm_soup_Llama-3.1-8B-Instruct-v0.1/a7ba1534-464f-45ba-834f-5f501b155c20.json b/leaderboard_data/HFOpenLLMv2/DeepAutoAI/DeepAutoAI_ldm_soup_Llama-3.1-8B-Instruct-v0.1/a7ba1534-464f-45ba-834f-5f501b155c20.json
deleted file mode 100644
index 3f234812361c46cdbf717d9e98f693008a4a8555..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DeepAutoAI/DeepAutoAI_ldm_soup_Llama-3.1-8B-Instruct-v0.1/a7ba1534-464f-45ba-834f-5f501b155c20.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DeepAutoAI_ldm_soup_Llama-3.1-8B-Instruct-v0.1/1762652579.550273",
- "retrieved_timestamp": "1762652579.5502741",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1",
- "developer": "DeepAutoAI",
- "inference_platform": "unknown",
- "id": "DeepAutoAI/ldm_soup_Llama-3.1-8B-Instruct-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7889499860370484
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5125175335277464
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19184290030211482
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41213541666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38954454787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Lexora-Lite-3B/5eb28bbd-8428-4385-b078-13e8a868e9f0.json b/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Lexora-Lite-3B/5eb28bbd-8428-4385-b078-13e8a868e9f0.json
deleted file mode 100644
index 3fe0eddfe8e698793350f7d097c3a0aebde79a44..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Lexora-Lite-3B/5eb28bbd-8428-4385-b078-13e8a868e9f0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DeepMount00_Lexora-Lite-3B/1762652579.550504",
- "retrieved_timestamp": "1762652579.550505",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DeepMount00/Lexora-Lite-3B",
- "developer": "DeepMount00",
- "inference_platform": "unknown",
- "id": "DeepMount00/Lexora-Lite-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5775996577968678
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4873392373334518
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23036253776435045
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39660416666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3602061170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Lexora-Lite-3B_v2/bf38278f-6375-41a6-9744-04fb4a32ed72.json b/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Lexora-Lite-3B_v2/bf38278f-6375-41a6-9744-04fb4a32ed72.json
deleted file mode 100644
index aa9a7d93c9286d227da45d3674b474e44a206ab9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Lexora-Lite-3B_v2/bf38278f-6375-41a6-9744-04fb4a32ed72.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DeepMount00_Lexora-Lite-3B_v2/1762652579.550789",
- "retrieved_timestamp": "1762652579.550789",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DeepMount00/Lexora-Lite-3B_v2",
- "developer": "DeepMount00",
- "inference_platform": "unknown",
- "id": "DeepMount00/Lexora-Lite-3B_v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49431840848947456
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48117654754683153
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2280966767371601
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38215625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35438829787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Lexora-Medium-7B/cc8f594a-e2f7-49e3-8654-57f1b397797f.json b/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Lexora-Medium-7B/cc8f594a-e2f7-49e3-8654-57f1b397797f.json
deleted file mode 100644
index 290a6767699d2bf141a2f1fbc6a38a470e15af5d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Lexora-Medium-7B/cc8f594a-e2f7-49e3-8654-57f1b397797f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DeepMount00_Lexora-Medium-7B/1762652579.551008",
- "retrieved_timestamp": "1762652579.551009",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DeepMount00/Lexora-Medium-7B",
- "developer": "DeepMount00",
- "inference_platform": "unknown",
- "id": "DeepMount00/Lexora-Medium-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4103379034295669
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5144844494250328
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22205438066465258
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44394791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43251329787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Qwen2.5-7B-Instruct-MathCoder/ea1a36fb-66c0-4b1a-bdac-7ec2602a7c65.json b/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Qwen2.5-7B-Instruct-MathCoder/ea1a36fb-66c0-4b1a-bdac-7ec2602a7c65.json
deleted file mode 100644
index c97063c1bec61605b530e86b10955d7ed580d98b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_Qwen2.5-7B-Instruct-MathCoder/ea1a36fb-66c0-4b1a-bdac-7ec2602a7c65.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2.5-7B-Instruct-MathCoder/1762652579.55323",
- "retrieved_timestamp": "1762652579.553231",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DeepMount00/Qwen2.5-7B-Instruct-MathCoder",
- "developer": "DeepMount00",
- "inference_platform": "unknown",
- "id": "DeepMount00/Qwen2.5-7B-Instruct-MathCoder"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15302508455342934
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2998444769655102
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0007552870090634441
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3806354166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11178523936170212
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_mergekit-ties-okvgjfz/34350829-d42d-4e67-b23f-171044428c1f.json b/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_mergekit-ties-okvgjfz/34350829-d42d-4e67-b23f-171044428c1f.json
deleted file mode 100644
index 94e14b85d87033ded181f096b4cb4d5f9dd8dfde..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DeepMount00/DeepMount00_mergekit-ties-okvgjfz/34350829-d42d-4e67-b23f-171044428c1f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DeepMount00_mergekit-ties-okvgjfz/1762652579.5535848",
- "retrieved_timestamp": "1762652579.553586",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DeepMount00/mergekit-ties-okvgjfz",
- "developer": "DeepMount00",
- "inference_platform": "unknown",
- "id": "DeepMount00/mergekit-ties-okvgjfz"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15302508455342934
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2998444769655102
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0007552870090634441
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3806354166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11178523936170212
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Baldur-8B/6267c5c6-abd3-4eb0-94ca-5c569414e7a9.json b/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Baldur-8B/6267c5c6-abd3-4eb0-94ca-5c569414e7a9.json
deleted file mode 100644
index fd3a3c053ddb61bac42892d36dcb302cf424ac2a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Baldur-8B/6267c5c6-abd3-4eb0-94ca-5c569414e7a9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Delta-Vector_Baldur-8B/1762652579.5538838",
- "retrieved_timestamp": "1762652579.553885",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Delta-Vector/Baldur-8B",
- "developer": "Delta-Vector",
- "inference_platform": "unknown",
- "id": "Delta-Vector/Baldur-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47818233398493776
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5305842954529679
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14350453172205438
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43715624999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3654421542553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Control-8B-V1.1/20796a87-8691-44b9-9b60-85ad3c7f4b7b.json b/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Control-8B-V1.1/20796a87-8691-44b9-9b60-85ad3c7f4b7b.json
deleted file mode 100644
index a312468cb7d9076264dc84a7dcc0d0876e67ebf8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Control-8B-V1.1/20796a87-8691-44b9-9b60-85ad3c7f4b7b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Delta-Vector_Control-8B-V1.1/1762652579.5543838",
- "retrieved_timestamp": "1762652579.554385",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Delta-Vector/Control-8B-V1.1",
- "developer": "Delta-Vector",
- "inference_platform": "unknown",
- "id": "Delta-Vector/Control-8B-V1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5696562897556262
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49928406748541837
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12764350453172205
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42372916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37450132978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Control-8B/26dc4843-56a7-45b5-a61a-386e260574a2.json b/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Control-8B/26dc4843-56a7-45b5-a61a-386e260574a2.json
deleted file mode 100644
index 007a2da14d07b351b27e03b573c235d5a4c4df78..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Control-8B/26dc4843-56a7-45b5-a61a-386e260574a2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Delta-Vector_Control-8B/1762652579.554166",
- "retrieved_timestamp": "1762652579.554166",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Delta-Vector/Control-8B",
- "developer": "Delta-Vector",
- "inference_platform": "unknown",
- "id": "Delta-Vector/Control-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5489733906035985
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5041458754993735
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13897280966767372
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43554166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3731715425531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Darkens-8B/a1689935-8ccb-49a8-8c2a-8dbf32b7ac02.json b/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Darkens-8B/a1689935-8ccb-49a8-8c2a-8dbf32b7ac02.json
deleted file mode 100644
index 49c77e5d39d5d676bf0df8a11453db078859104d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Darkens-8B/a1689935-8ccb-49a8-8c2a-8dbf32b7ac02.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Delta-Vector_Darkens-8B/1762652579.5545971",
- "retrieved_timestamp": "1762652579.5545971",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Delta-Vector/Darkens-8B",
- "developer": "Delta-Vector",
- "inference_platform": "unknown",
- "id": "Delta-Vector/Darkens-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25476624245889795
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5250590567372793
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05891238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32466442953020136
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4105520833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3735871010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 8.414
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Henbane-7b-attempt2/73f9a017-15ac-42e6-9600-69b411de4086.json b/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Henbane-7b-attempt2/73f9a017-15ac-42e6-9600-69b411de4086.json
deleted file mode 100644
index a137c4b605875b3ce3b8562c979c54b9af373e70..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Henbane-7b-attempt2/73f9a017-15ac-42e6-9600-69b411de4086.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Delta-Vector_Henbane-7b-attempt2/1762652579.55481",
- "retrieved_timestamp": "1762652579.55481",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Delta-Vector/Henbane-7b-attempt2",
- "developer": "Delta-Vector",
- "inference_platform": "unknown",
- "id": "Delta-Vector/Henbane-7b-attempt2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4157335868828043
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5061177974093075
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22734138972809667
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39734375000000005
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4027593085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Odin-9B/586d4e20-c1f4-466a-8488-07ac18ad6253.json b/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Odin-9B/586d4e20-c1f4-466a-8488-07ac18ad6253.json
deleted file mode 100644
index 9d3f2912a311f456826ad4d1baf7c0c02c19b943..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Odin-9B/586d4e20-c1f4-466a-8488-07ac18ad6253.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Delta-Vector_Odin-9B/1762652579.555037",
- "retrieved_timestamp": "1762652579.555038",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Delta-Vector/Odin-9B",
- "developer": "Delta-Vector",
- "inference_platform": "unknown",
- "id": "Delta-Vector/Odin-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3691970637907419
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5440253444823155
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14501510574018128
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3414429530201342
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46478125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4046708776595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Tor-8B/ce7e8e58-e323-4704-b6f3-7fa6c5c3b7f2.json b/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Tor-8B/ce7e8e58-e323-4704-b6f3-7fa6c5c3b7f2.json
deleted file mode 100644
index 007c691daa47e3cec19dc634ca60e544995d6201..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Delta-Vector/Delta-Vector_Tor-8B/ce7e8e58-e323-4704-b6f3-7fa6c5c3b7f2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Delta-Vector_Tor-8B/1762652579.555239",
- "retrieved_timestamp": "1762652579.55524",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Delta-Vector/Tor-8B",
- "developer": "Delta-Vector",
- "inference_platform": "unknown",
- "id": "Delta-Vector/Tor-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23815476269631244
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5209108776928992
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05891238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3238255033557047
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40921874999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37300531914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 8.414
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_L3-8B-R1-WolfCore-V1.5-test/3c4058cd-238b-4b01-870d-8693f5ce1b8f.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_L3-8B-R1-WolfCore-V1.5-test/3c4058cd-238b-4b01-870d-8693f5ce1b8f.json
deleted file mode 100644
index 0a947bb7691874fe235bcf9d8a2afe35e827532b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_L3-8B-R1-WolfCore-V1.5-test/3c4058cd-238b-4b01-870d-8693f5ce1b8f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_L3-8B-R1-WolfCore-V1.5-test/1762652579.556192",
- "retrieved_timestamp": "1762652579.556193",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/L3-8B-R1-WolfCore-V1.5-test"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3955006050612375
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5314954163679548
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12311178247734139
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3263422818791946
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3840729166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37275598404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_L3-8B-R1-WolfCore/6d8d63c0-ad69-4224-8250-b1664f6abbcf.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_L3-8B-R1-WolfCore/6d8d63c0-ad69-4224-8250-b1664f6abbcf.json
deleted file mode 100644
index 6815336b17d5cdbe289b9facf0f1df7624782c10..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_L3-8B-R1-WolfCore/6d8d63c0-ad69-4224-8250-b1664f6abbcf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_L3-8B-R1-WolfCore/1762652579.555949",
- "retrieved_timestamp": "1762652579.5559502",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/L3-8B-R1-WolfCore",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/L3-8B-R1-WolfCore"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3775404814780339
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.531794652653343
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16314199395770393
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3288590604026846
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42766666666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3716755319148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_L3-8B-WolfCore/c6771d5c-acaf-4b17-96b4-abf3b75bc68f.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_L3-8B-WolfCore/c6771d5c-acaf-4b17-96b4-abf3b75bc68f.json
deleted file mode 100644
index 30ac21a40fece211d1af6c3b206e226251d67ad4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_L3-8B-WolfCore/c6771d5c-acaf-4b17-96b4-abf3b75bc68f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_L3-8B-WolfCore/1762652579.556399",
- "retrieved_timestamp": "1762652579.5564",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/L3-8B-WolfCore",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/L3-8B-WolfCore"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4021950646506824
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5181980783946081
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09818731117824774
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39728125000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3705119680851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-FoxFrame-test/ef5bb4eb-0875-4cc5-8e27-b59ffbd2e477.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-FoxFrame-test/ef5bb4eb-0875-4cc5-8e27-b59ffbd2e477.json
deleted file mode 100644
index cee948dd589b557fd64ebc1e6a3ebbaa1cc5bfd1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-FoxFrame-test/ef5bb4eb-0875-4cc5-8e27-b59ffbd2e477.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-FoxFrame-test/1762652579.556618",
- "retrieved_timestamp": "1762652579.556619",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MN-12B-FoxFrame-test",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MN-12B-FoxFrame-test"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42220308780701876
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5456376527271466
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13972809667673716
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42540625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3503158244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-FoxFrame2-test/e46698de-8b2d-4b3c-b482-8cc8a3665eac.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-FoxFrame2-test/e46698de-8b2d-4b3c-b482-8cc8a3665eac.json
deleted file mode 100644
index 0d5b8cc2986d70699324927f798b6c55d2db7309..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-FoxFrame2-test/e46698de-8b2d-4b3c-b482-8cc8a3665eac.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-FoxFrame2-test/1762652579.556837",
- "retrieved_timestamp": "1762652579.5568378",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MN-12B-FoxFrame2-test",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MN-12B-FoxFrame2-test"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43189514931492884
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5484795753806021
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1404833836858006
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3145973154362416
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4251875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3568816489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-FoxFrame3-test/35351894-ea9d-456b-ab9a-c98686948e6b.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-FoxFrame3-test/35351894-ea9d-456b-ab9a-c98686948e6b.json
deleted file mode 100644
index a4af875561b618722454ef9fb63dbabc16021a38..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-FoxFrame3-test/35351894-ea9d-456b-ab9a-c98686948e6b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-FoxFrame3-test/1762652579.557049",
- "retrieved_timestamp": "1762652579.5570502",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MN-12B-FoxFrame3-test",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MN-12B-FoxFrame3-test"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43231957871780213
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5394764281718397
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13217522658610273
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011744966442953
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45976041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35289228723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Kakigori/2f19082b-8377-4f63-8c5f-1aa25071a240.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Kakigori/2f19082b-8377-4f63-8c5f-1aa25071a240.json
deleted file mode 100644
index 049afa2cc622669abfba973f9ef81358a191f9b7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Kakigori/2f19082b-8377-4f63-8c5f-1aa25071a240.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Kakigori/1762652579.5572648",
- "retrieved_timestamp": "1762652579.557266",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MN-12B-Kakigori",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MN-12B-Kakigori"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.359329911302012
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5415529337961275
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11933534743202417
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32466442953020136
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40521875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3581283244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame-Experiment-2/630c100f-c88d-42a7-9614-bd9a958eab2b.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame-Experiment-2/630c100f-c88d-42a7-9614-bd9a958eab2b.json
deleted file mode 100644
index b8b4afe65d8ceb288e8dc45eee8582e7a886b2e1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame-Experiment-2/630c100f-c88d-42a7-9614-bd9a958eab2b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame-Experiment-2/1762652579.5578592",
- "retrieved_timestamp": "1762652579.5578601",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MN-12B-LilithFrame-Experiment-2",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MN-12B-LilithFrame-Experiment-2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4299469851106176
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4982672766561394
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10725075528700906
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32550335570469796
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3804479166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32762632978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame-Experiment-3/37292ca7-9e82-4c80-bc6e-bc7e1be7a95e.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame-Experiment-3/37292ca7-9e82-4c80-bc6e-bc7e1be7a95e.json
deleted file mode 100644
index bea82e52b33f91536072628d4b7044d3f7ab64c6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame-Experiment-3/37292ca7-9e82-4c80-bc6e-bc7e1be7a95e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame-Experiment-3/1762652579.558079",
- "retrieved_timestamp": "1762652579.558079",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MN-12B-LilithFrame-Experiment-3",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MN-12B-LilithFrame-Experiment-3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4127858526487498
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5468080647121653
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13444108761329304
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32802013422818793
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4038541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3603723404255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame-Experiment-4/ecc18f9c-c495-4ae6-8fd8-b2f84fb453ac.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame-Experiment-4/ecc18f9c-c495-4ae6-8fd8-b2f84fb453ac.json
deleted file mode 100644
index 4249b365d0d2543cce74f07f1c74cff306774184..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame-Experiment-4/ecc18f9c-c495-4ae6-8fd8-b2f84fb453ac.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame-Experiment-4/1762652579.5582879",
- "retrieved_timestamp": "1762652579.5582888",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MN-12B-LilithFrame-Experiment-4",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MN-12B-LilithFrame-Experiment-4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3981480250180632
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5534370722864824
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12235649546827794
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31711409395973156
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43706249999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3648603723404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame/3d68e2fb-06cc-43b9-830b-f1cd02f12166.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame/3d68e2fb-06cc-43b9-830b-f1cd02f12166.json
deleted file mode 100644
index 88de0d3743e59c9de9ac00411b8e139578fa6bcf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame/3d68e2fb-06cc-43b9-830b-f1cd02f12166.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame/1762652579.557674",
- "retrieved_timestamp": "1762652579.5576751",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MN-12B-LilithFrame",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MN-12B-LilithFrame"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43604192431636946
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4956125598349656
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05891238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3842604166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32372007978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame/a04a8775-8b4d-4608-9692-47af9f7ed5a7.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame/a04a8775-8b4d-4608-9692-47af9f7ed5a7.json
deleted file mode 100644
index 3d42198fe516d706ac40694d629f216274e4c5dc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-LilithFrame/a04a8775-8b4d-4608-9692-47af9f7ed5a7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-LilithFrame/1762652579.557468",
- "retrieved_timestamp": "1762652579.557469",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MN-12B-LilithFrame",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MN-12B-LilithFrame"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4509545782966972
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4944264226434414
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11555891238670694
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3895625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3256316489361702
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-GreenSnake/9b9eb072-4120-4a6a-a565-27136e617f10.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-GreenSnake/9b9eb072-4120-4a6a-a565-27136e617f10.json
deleted file mode 100644
index f2718c16e9ad6aebf9aae47635f4521873c329c2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-GreenSnake/9b9eb072-4120-4a6a-a565-27136e617f10.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-GreenSnake/1762652579.5585039",
- "retrieved_timestamp": "1762652579.558505",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MN-12B-Mimicore-GreenSnake",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MN-12B-Mimicore-GreenSnake"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47800724300411795
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5480509710089697
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13897280966767372
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32466442953020136
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4305833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3651097074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Nocturne/6a21892f-1d11-4c59-8894-8800822b2e72.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Nocturne/6a21892f-1d11-4c59-8894-8800822b2e72.json
deleted file mode 100644
index 2dfeac638713f216d4a35ba6b749e63dfea922f7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Nocturne/6a21892f-1d11-4c59-8894-8800822b2e72.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Nocturne/1762652579.558723",
- "retrieved_timestamp": "1762652579.5587242",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MN-12B-Mimicore-Nocturne",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MN-12B-Mimicore-Nocturne"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3956502081144696
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5703329773483826
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10574018126888217
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45690625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36336436170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi-v2-Experiment/db8eedcc-1dcf-47af-9c2b-a72da97146ca.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi-v2-Experiment/db8eedcc-1dcf-47af-9c2b-a72da97146ca.json
deleted file mode 100644
index 5d3debf648e24a82df4ca8d7a5a7916045d19029..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi-v2-Experiment/db8eedcc-1dcf-47af-9c2b-a72da97146ca.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Orochi-v2-Experiment/1762652579.5591779",
- "retrieved_timestamp": "1762652579.559179",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MN-12B-Mimicore-Orochi-v2-Experiment"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2842413684579139
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5322525988273211
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06117824773413897
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45737500000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3423371010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi-v3-Experiment/8198ab16-4a8b-4da9-8e8a-d1e3beb02839.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi-v3-Experiment/8198ab16-4a8b-4da9-8e8a-d1e3beb02839.json
deleted file mode 100644
index cd4f535d75b92a3a6d9c00dde50e7e46a250c3ee..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi-v3-Experiment/8198ab16-4a8b-4da9-8e8a-d1e3beb02839.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Orochi-v3-Experiment/1762652579.559391",
- "retrieved_timestamp": "1762652579.559392",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MN-12B-Mimicore-Orochi-v3-Experiment"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4101628124487471
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5437817873983797
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1216012084592145
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29278523489932884
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44379166666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.339594414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi-v4-Experiment/e4e71999-6f83-4745-8a9d-66e711e39ac3.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi-v4-Experiment/e4e71999-6f83-4745-8a9d-66e711e39ac3.json
deleted file mode 100644
index 3b8dc74a58b611c19404f3a5c279032bd0a2a098..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi-v4-Experiment/e4e71999-6f83-4745-8a9d-66e711e39ac3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Orochi-v4-Experiment/1762652579.559606",
- "retrieved_timestamp": "1762652579.559606",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MN-12B-Mimicore-Orochi-v4-Experiment"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4320702402957486
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5462502212045214
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12084592145015106
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4449375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3519780585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi/f1bfef73-3586-4f9d-80ca-71b0fb00aadd.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi/f1bfef73-3586-4f9d-80ca-71b0fb00aadd.json
deleted file mode 100644
index 2aa943f1db42b1b15ad15293eb6439be51010d8f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-Orochi/f1bfef73-3586-4f9d-80ca-71b0fb00aadd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-Orochi/1762652579.558937",
- "retrieved_timestamp": "1762652579.558938",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MN-12B-Mimicore-Orochi",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MN-12B-Mimicore-Orochi"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4620451513096362
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.54977394640115
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13595166163141995
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45458333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34466422872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-1/aa2478d9-59bd-458b-abee-5669aa6280df.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-1/aa2478d9-59bd-458b-abee-5669aa6280df.json
deleted file mode 100644
index 874d0e54c3fee5e2a7c7d62c9d313e2d03a9b989..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-1/aa2478d9-59bd-458b-abee-5669aa6280df.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-1/1762652579.5600362",
- "retrieved_timestamp": "1762652579.5600362",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39090391272933595
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48656395204478037
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07854984894259819
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3789583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31141954787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-2/66bd7a21-6f85-49b5-bc01-3f52ed8d1c64.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-2/66bd7a21-6f85-49b5-bc01-3f52ed8d1c64.json
deleted file mode 100644
index 80b632ad0bca748f2de4ca209bdb40e9a5e60bcf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-2/66bd7a21-6f85-49b5-bc01-3f52ed8d1c64.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-2/1762652579.560246",
- "retrieved_timestamp": "1762652579.560246",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31239333856389934
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5126398500939828
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11253776435045318
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39746875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33136635638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-3/1a3eefa6-7b3d-4541-93b0-8fe86f6bf038.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-3/1a3eefa6-7b3d-4541-93b0-8fe86f6bf038.json
deleted file mode 100644
index bc70dc1d4b40f3cec93db77fe56b0313f1209695..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-3/1a3eefa6-7b3d-4541-93b0-8fe86f6bf038.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-3/1762652579.56046",
- "retrieved_timestamp": "1762652579.560461",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4302218114602588
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4811798810475259
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08987915407854985
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3684166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31981382978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-4/d7303703-f33e-430b-813d-998c95dbdb67.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-4/d7303703-f33e-430b-813d-998c95dbdb67.json
deleted file mode 100644
index 9c4e261190f62843b87ab0aa12243a05886065a8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-4/d7303703-f33e-430b-813d-998c95dbdb67.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake-v2-Experiment-4/1762652579.560668",
- "retrieved_timestamp": "1762652579.560668",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake-v2-Experiment-4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42405151664250856
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5184748714407336
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11404833836858005
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40019791666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3341921542553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake/8aa34df4-8347-4f2d-98a0-7ec58bd62e43.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake/8aa34df4-8347-4f2d-98a0-7ec58bd62e43.json
deleted file mode 100644
index 4fc07e3479343a0cff8766fb57d4ecc76dc52899..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Mimicore-WhiteSnake/8aa34df4-8347-4f2d-98a0-7ec58bd62e43.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Mimicore-WhiteSnake/1762652579.55982",
- "retrieved_timestamp": "1762652579.5598211",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MN-12B-Mimicore-WhiteSnake"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44376033369238066
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5604605871844869
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13141993957703926
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3179530201342282
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.456875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3657746010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Unleashed-Twilight/00f0fe96-4a06-46e7-88d8-368b86bcdb06.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Unleashed-Twilight/00f0fe96-4a06-46e7-88d8-368b86bcdb06.json
deleted file mode 100644
index fb25db5473451ed62411602ba9faf50092b5ddbf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-Unleashed-Twilight/00f0fe96-4a06-46e7-88d8-368b86bcdb06.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-Unleashed-Twilight/1762652579.560919",
- "retrieved_timestamp": "1762652579.56092",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MN-12B-Unleashed-Twilight",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MN-12B-Unleashed-Twilight"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3505121965274361
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5520627163174447
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09592145015105741
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3288590604026846
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4383958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3677692819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-WolFrame/3bb96e7a-6c09-4b9e-8f2b-0b525c2ebeb3.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-WolFrame/3bb96e7a-6c09-4b9e-8f2b-0b525c2ebeb3.json
deleted file mode 100644
index 76cd34d944e3458ed4a1ca14b2f065c9e080720b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MN-12B-WolFrame/3bb96e7a-6c09-4b9e-8f2b-0b525c2ebeb3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MN-12B-WolFrame/1762652579.5611808",
- "retrieved_timestamp": "1762652579.561182",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MN-12B-WolFrame",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MN-12B-WolFrame"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4397387819873491
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.511681287565329
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13141993957703926
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40146875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33934507978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-test/e6031abf-1ae2-431c-8247-3124fff41d17.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-test/e6031abf-1ae2-431c-8247-3124fff41d17.json
deleted file mode 100644
index c562fc9b2701b9550518b8b0cdb04b6a653abddc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-test/e6031abf-1ae2-431c-8247-3124fff41d17.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B-test/1762652579.5616372",
- "retrieved_timestamp": "1762652579.5616379",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MiniusLight-24B-test",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MiniusLight-24B-test"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03936776641533354
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6333927323374534
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0256797583081571
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36828859060402686
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40925000000000006
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5182014627659575
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 23.572
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-v1b-test/2917ef74-c8cb-4255-8bda-76280fbe7c64.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-v1b-test/2917ef74-c8cb-4255-8bda-76280fbe7c64.json
deleted file mode 100644
index 7c4010756499bd81818875a5dcec5147abdab087..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-v1b-test/2917ef74-c8cb-4255-8bda-76280fbe7c64.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B-v1b-test/1762652579.561931",
- "retrieved_timestamp": "1762652579.561932",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MiniusLight-24B-v1b-test",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MiniusLight-24B-v1b-test"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37911408396388246
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6617145681113757
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2394259818731118
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37919463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4557291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5364860372340425
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 23.572
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-v1c-test/23a21492-0897-44b4-a046-cf93fa8c2a64.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-v1c-test/23a21492-0897-44b4-a046-cf93fa8c2a64.json
deleted file mode 100644
index 2fc6893347505f4eef150ad043c2d4b7440a7b2b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-v1c-test/23a21492-0897-44b4-a046-cf93fa8c2a64.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B-v1c-test/1762652579.562173",
- "retrieved_timestamp": "1762652579.5621738",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MiniusLight-24B-v1c-test",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MiniusLight-24B-v1c-test"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37858881102142317
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6752681657268389
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29682779456193353
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3951342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46341666666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5487034574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 23.572
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-v1d-test/af67712e-7436-4703-ac22-9878dd8e190a.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-v1d-test/af67712e-7436-4703-ac22-9878dd8e190a.json
deleted file mode 100644
index 9e2cb14290f8c59da4c302c35c1815be6e612bbc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B-v1d-test/af67712e-7436-4703-ac22-9878dd8e190a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B-v1d-test/1762652579.5624058",
- "retrieved_timestamp": "1762652579.5624058",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MiniusLight-24B-v1d-test",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MiniusLight-24B-v1d-test"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40324339419407174
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6712025325276962
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2945619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3951342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46208333333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5488696808510638
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 23.572
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B/2ec36e2e-0fba-4c6a-b9d0-fe57e7d708ef.json b/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B/2ec36e2e-0fba-4c6a-b9d0-fe57e7d708ef.json
deleted file mode 100644
index 3780b3adc0feaeb4e6046946826c1262b211154e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DoppelReflEx/DoppelReflEx_MiniusLight-24B/2ec36e2e-0fba-4c6a-b9d0-fe57e7d708ef.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DoppelReflEx_MiniusLight-24B/1762652579.561418",
- "retrieved_timestamp": "1762652579.561419",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DoppelReflEx/MiniusLight-24B",
- "developer": "DoppelReflEx",
- "inference_platform": "unknown",
- "id": "DoppelReflEx/MiniusLight-24B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25766410900854175
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6256461050033514
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12613293051359517
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35822147651006714
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43191666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5091422872340425
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 23.572
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Again-8B-Model_Stock/cd2de45f-874a-4d63-bb6d-0afe5e687964.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Again-8B-Model_Stock/cd2de45f-874a-4d63-bb6d-0afe5e687964.json
deleted file mode 100644
index 945bbd224af8b5c35947455381a3f6ce89b0b579..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Again-8B-Model_Stock/cd2de45f-874a-4d63-bb6d-0afe5e687964.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Again-8B-Model_Stock/1762652579.562616",
- "retrieved_timestamp": "1762652579.562617",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Again-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Again-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6724213974476612
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5309801059970912
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12009063444108761
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011744966442953
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39867708333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.351811835106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 4.015
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Alita99-8B-LINEAR/570c991f-06bc-45d1-8409-d779a07df9a6.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Alita99-8B-LINEAR/570c991f-06bc-45d1-8409-d779a07df9a6.json
deleted file mode 100644
index 9f47b0d5235076eeb434885fdeb6ead1f8bcf976..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Alita99-8B-LINEAR/570c991f-06bc-45d1-8409-d779a07df9a6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Alita99-8B-LINEAR/1762652579.562879",
- "retrieved_timestamp": "1762652579.56288",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Alita99-8B-LINEAR",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Alita99-8B-LINEAR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7190077882241341
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5441767095577089
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1646525679758308
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42664583333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38090093085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_AnotherTest/81ec7c1a-8874-44c3-b482-8a8ecfb2ae72.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_AnotherTest/81ec7c1a-8874-44c3-b482-8a8ecfb2ae72.json
deleted file mode 100644
index 0a105c7685c453ea021c2825a593c44b8db149c8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_AnotherTest/81ec7c1a-8874-44c3-b482-8a8ecfb2ae72.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_AnotherTest/1762652579.563089",
- "retrieved_timestamp": "1762652579.563089",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/AnotherTest",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/AnotherTest"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47006387496287627
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46834113564549334
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.061933534743202415
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42128125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2874833776595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire-8B-model_stock/28bd44a9-d916-4a0b-b0ae-c6a4cb5d727d.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire-8B-model_stock/28bd44a9-d916-4a0b-b0ae-c6a4cb5d727d.json
deleted file mode 100644
index fce22b64236780a53835c758e78beb9c97316242..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire-8B-model_stock/28bd44a9-d916-4a0b-b0ae-c6a4cb5d727d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire-8B-model_stock/1762652579.5633001",
- "retrieved_timestamp": "1762652579.563301",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Aspire-8B-model_stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Aspire-8B-model_stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7140620221013578
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5278251846388996
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14954682779456194
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3145973154362416
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42124999999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37632978723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_1.3-8B_model-stock/917a9361-af08-4e12-a93a-01321629b31f.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_1.3-8B_model-stock/917a9361-af08-4e12-a93a-01321629b31f.json
deleted file mode 100644
index fb1ba83656cab470ba9deecb728a192dba4710dc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_1.3-8B_model-stock/917a9361-af08-4e12-a93a-01321629b31f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_1.3-8B_model-stock/1762652579.563606",
- "retrieved_timestamp": "1762652579.563607",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Aspire_1.3-8B_model-stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Aspire_1.3-8B_model-stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7061685217445268
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5301644606574212
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1691842900302115
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4104583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37159242021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2-8B-Model_Stock/677221cd-f218-4982-8363-d969913d7a22.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2-8B-Model_Stock/677221cd-f218-4982-8363-d969913d7a22.json
deleted file mode 100644
index d0a25b481c292f4e780742776357a7e9e6829400..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2-8B-Model_Stock/677221cd-f218-4982-8363-d969913d7a22.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V2-8B-Model_Stock/1762652579.56384",
- "retrieved_timestamp": "1762652579.563841",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Aspire_V2-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Aspire_V2-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7371430027881576
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5329650089428358
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17598187311178248
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38937499999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3696808510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2.1-8B-Model_Stock/292e77cb-e6e6-4d10-9956-1e09369e9669.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2.1-8B-Model_Stock/292e77cb-e6e6-4d10-9956-1e09369e9669.json
deleted file mode 100644
index d34c6c4775aca9fecf4bae4713e63147298c9601..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2.1-8B-Model_Stock/292e77cb-e6e6-4d10-9956-1e09369e9669.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V2.1-8B-Model_Stock/1762652579.564126",
- "retrieved_timestamp": "1762652579.564127",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Aspire_V2.1-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Aspire_V2.1-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7237540836092679
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5236395810818485
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17673716012084592
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41359375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3800698138297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2_ALT-8B-Model_Stock/62414bde-98c1-4cae-af6d-18d3b0ecd50a.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2_ALT-8B-Model_Stock/62414bde-98c1-4cae-af6d-18d3b0ecd50a.json
deleted file mode 100644
index 994d8bc3d1005c7c1499498109e0a346e9f4dbc4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2_ALT-8B-Model_Stock/62414bde-98c1-4cae-af6d-18d3b0ecd50a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V2_ALT-8B-Model_Stock/1762652579.5643399",
- "retrieved_timestamp": "1762652579.564341",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Aspire_V2_ALT-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Aspire_V2_ALT-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7381170848903134
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5265819478728287
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1729607250755287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32466442953020136
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39749999999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3726728723404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2_ALT_ROW-8B-Model_Stock/3258c5c6-d12d-4e09-8404-22b6aaf82e87.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2_ALT_ROW-8B-Model_Stock/3258c5c6-d12d-4e09-8404-22b6aaf82e87.json
deleted file mode 100644
index c85eb1266fa02ac44db15e54c89e4511bfdb0fc6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V2_ALT_ROW-8B-Model_Stock/3258c5c6-d12d-4e09-8404-22b6aaf82e87.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V2_ALT_ROW-8B-Model_Stock/1762652579.564561",
- "retrieved_timestamp": "1762652579.5645618",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Aspire_V2_ALT_ROW-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7381170848903134
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5265819478728287
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1729607250755287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32466442953020136
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39749999999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3726728723404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V3-8B-Model_Stock/3cc8c02f-87a8-428a-8991-a0d52500d927.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V3-8B-Model_Stock/3cc8c02f-87a8-428a-8991-a0d52500d927.json
deleted file mode 100644
index d37945534ef9bf1126d4a389a15562304a96eaf0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V3-8B-Model_Stock/3cc8c02f-87a8-428a-8991-a0d52500d927.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V3-8B-Model_Stock/1762652579.5648441",
- "retrieved_timestamp": "1762652579.564845",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Aspire_V3-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Aspire_V3-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5118795905973927
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5267958758971987
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18580060422960726
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40149999999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36419547872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V4-8B-Model_Stock/692e0ff5-0607-4aae-8996-45bbbc4d2288.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V4-8B-Model_Stock/692e0ff5-0607-4aae-8996-45bbbc4d2288.json
deleted file mode 100644
index 7252a1a00643d12772e03cd31bd442c5039c546f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V4-8B-Model_Stock/692e0ff5-0607-4aae-8996-45bbbc4d2288.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V4-8B-Model_Stock/1762652579.565063",
- "retrieved_timestamp": "1762652579.565064",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Aspire_V4-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Aspire_V4-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.769416259967996
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5314037161536506
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19259818731117825
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3867395833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.370844414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V4_ALT-8B-Model_Stock/7b634b21-8d89-4656-89d7-3590fc8a883a.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V4_ALT-8B-Model_Stock/7b634b21-8d89-4656-89d7-3590fc8a883a.json
deleted file mode 100644
index 206d588e5e5cc71ed60ceb66d41045fcac3aedf8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aspire_V4_ALT-8B-Model_Stock/7b634b21-8d89-4656-89d7-3590fc8a883a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Aspire_V4_ALT-8B-Model_Stock/1762652579.565274",
- "retrieved_timestamp": "1762652579.565275",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Aspire_V4_ALT-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Aspire_V4_ALT-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7365933500888753
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5268232518944024
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18126888217522658
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3920416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3681848404255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Asymmetric_Linearity-8B-Model_Stock/ad58e69a-0917-4375-9e83-5db2ad50d0ca.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Asymmetric_Linearity-8B-Model_Stock/ad58e69a-0917-4375-9e83-5db2ad50d0ca.json
deleted file mode 100644
index e50673e7015969f2855448c6d8a928f725265b89..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Asymmetric_Linearity-8B-Model_Stock/ad58e69a-0917-4375-9e83-5db2ad50d0ca.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Asymmetric_Linearity-8B-Model_Stock/1762652579.5654871",
- "retrieved_timestamp": "1762652579.565488",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Asymmetric_Linearity-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Asymmetric_Linearity-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7174341857382855
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.546535755155883
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1646525679758308
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3145973154362416
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41994791666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3843916223404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 4.015
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aurora_faustus-8B-LINEAR/c8b72a17-837a-45ed-b285-bf472a4f6d45.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aurora_faustus-8B-LINEAR/c8b72a17-837a-45ed-b285-bf472a4f6d45.json
deleted file mode 100644
index cc9f005454fab20cba11b030029be8ae1512d6a6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aurora_faustus-8B-LINEAR/c8b72a17-837a-45ed-b285-bf472a4f6d45.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Aurora_faustus-8B-LINEAR/1762652579.565701",
- "retrieved_timestamp": "1762652579.565702",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Aurora_faustus-8B-LINEAR",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Aurora_faustus-8B-LINEAR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7281003293483512
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5515538279425277
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17069486404833836
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4145833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3842253989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aurora_faustus-8B-LORABLATED/05707286-d03b-4cb2-9a0f-48245c867cc7.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aurora_faustus-8B-LORABLATED/05707286-d03b-4cb2-9a0f-48245c867cc7.json
deleted file mode 100644
index 55aea1d9b182b1659e5f016333a34e564804b96b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aurora_faustus-8B-LORABLATED/05707286-d03b-4cb2-9a0f-48245c867cc7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Aurora_faustus-8B-LORABLATED/1762652579.565921",
- "retrieved_timestamp": "1762652579.565921",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Aurora_faustus-8B-LORABLATED",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Aurora_faustus-8B-LORABLATED"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7527050448365891
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.539159616655651
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1487915407854985
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42385416666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36727061170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aurora_faustus-8B-LORABLATED_ALT/2b644863-f52f-487a-85d1-3fc3ce973d90.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aurora_faustus-8B-LORABLATED_ALT/2b644863-f52f-487a-85d1-3fc3ce973d90.json
deleted file mode 100644
index 0285f5489b64e254d9e56b0db6e208ee6577541d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Aurora_faustus-8B-LORABLATED_ALT/2b644863-f52f-487a-85d1-3fc3ce973d90.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Aurora_faustus-8B-LORABLATED_ALT/1762652579.566129",
- "retrieved_timestamp": "1762652579.56613",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Aurora_faustus-8B-LORABLATED_ALT",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Aurora_faustus-8B-LORABLATED_ALT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7377923908562614
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5387670721191214
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15861027190332327
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4225208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36943151595744683
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Autumn_Dawn-8B-LINEAR/4f1d1b68-311f-4409-bf5b-41629a889da3.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Autumn_Dawn-8B-LINEAR/4f1d1b68-311f-4409-bf5b-41629a889da3.json
deleted file mode 100644
index a6e629a23db71c42e1c97c80a3431344e25282dc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Autumn_Dawn-8B-LINEAR/4f1d1b68-311f-4409-bf5b-41629a889da3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Autumn_Dawn-8B-LINEAR/1762652579.566346",
- "retrieved_timestamp": "1762652579.5663471",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Autumn_Dawn-8B-LINEAR",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Autumn_Dawn-8B-LINEAR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7292993701157373
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5459436958014627
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18580060422960726
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4185520833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39677526595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel-8B-LINEAR/f3af4295-9508-4a3e-ba5a-6336a560fd6c.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel-8B-LINEAR/f3af4295-9508-4a3e-ba5a-6336a560fd6c.json
deleted file mode 100644
index dfad81f64921d207f4619ce75cbec9a7d1eea62c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel-8B-LINEAR/f3af4295-9508-4a3e-ba5a-6336a560fd6c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel-8B-LINEAR/1762652579.56655",
- "retrieved_timestamp": "1762652579.566551",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/BaeZel-8B-LINEAR",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/BaeZel-8B-LINEAR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7377923908562614
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5463800554321383
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18126888217522658
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3213087248322148
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4227083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3861369680851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel-8B-Model_Stock/31395ff6-82da-4585-85d6-459fcac9408f.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel-8B-Model_Stock/31395ff6-82da-4585-85d6-459fcac9408f.json
deleted file mode 100644
index a8f953dd262e9950c4503b059afc7df8e1d502b9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel-8B-Model_Stock/31395ff6-82da-4585-85d6-459fcac9408f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel-8B-Model_Stock/1762652579.566763",
- "retrieved_timestamp": "1762652579.566764",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/BaeZel-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/BaeZel-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7713145564878965
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5407680550216925
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16389728096676737
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41991666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38804853723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel_V2-8B-Model_Stock/cdacd0e9-fa22-4053-b16d-d3bac8541829.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel_V2-8B-Model_Stock/cdacd0e9-fa22-4053-b16d-d3bac8541829.json
deleted file mode 100644
index 244bf1daf99dfc1834d11222299297776f92277b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel_V2-8B-Model_Stock/cdacd0e9-fa22-4053-b16d-d3bac8541829.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel_V2-8B-Model_Stock/1762652579.566977",
- "retrieved_timestamp": "1762652579.566978",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/BaeZel_V2-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/BaeZel_V2-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7676675665013276
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5373871612758611
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1797583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4185833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3946974734042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel_V2_ALT-8B-Model_Stock/08ac7c80-0f13-43c9-a538-683eb6927b59.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel_V2_ALT-8B-Model_Stock/08ac7c80-0f13-43c9-a538-683eb6927b59.json
deleted file mode 100644
index 8afe613870ef34ecf6b2c81d225966c5de267485..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel_V2_ALT-8B-Model_Stock/08ac7c80-0f13-43c9-a538-683eb6927b59.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel_V2_ALT-8B-Model_Stock/1762652579.567195",
- "retrieved_timestamp": "1762652579.567196",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/BaeZel_V2_ALT-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/BaeZel_V2_ALT-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7676675665013276
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5373871612758611
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1797583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4185833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3946974734042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel_V3-8B-Model_Stock/91ec0c61-73ca-463f-b3be-3386293e4fc0.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel_V3-8B-Model_Stock/91ec0c61-73ca-463f-b3be-3386293e4fc0.json
deleted file mode 100644
index 6a6fea4daf7983838a44c6f4bf3318f8ffaa23ab..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BaeZel_V3-8B-Model_Stock/91ec0c61-73ca-463f-b3be-3386293e4fc0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_BaeZel_V3-8B-Model_Stock/1762652579.5674188",
- "retrieved_timestamp": "1762652579.56742",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/BaeZel_V3-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/BaeZel_V3-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7831797408653485
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.539231076759135
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18957703927492447
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41743749999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3887965425531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Blunt_Edge-8B-SLERP/35807c64-beed-4022-a4ba-1284c5f6124f.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Blunt_Edge-8B-SLERP/35807c64-beed-4022-a4ba-1284c5f6124f.json
deleted file mode 100644
index 5cde93855c1b9d443b7f2729667b789987713f91..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Blunt_Edge-8B-SLERP/35807c64-beed-4022-a4ba-1284c5f6124f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Blunt_Edge-8B-SLERP/1762652579.567633",
- "retrieved_timestamp": "1762652579.5676339",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Blunt_Edge-8B-SLERP",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Blunt_Edge-8B-SLERP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7496575752337131
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5389470863694941
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18580060422960726
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.311241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.417375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37666223404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BulkUp/3c2e7750-3257-4012-8b43-44387707170c.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BulkUp/3c2e7750-3257-4012-8b43-44387707170c.json
deleted file mode 100644
index 1ae3a6f226ec127ae39cc5cf40ff964807cfaacc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_BulkUp/3c2e7750-3257-4012-8b43-44387707170c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_BulkUp/1762652579.567868",
- "retrieved_timestamp": "1762652579.567869",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/BulkUp",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/BulkUp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.177804891022487
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28698602947692575
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24748322147651006
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3446666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11095412234042554
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Cadence-8B-LINEAR/8be55d6b-7fe0-41cf-86a6-66327dd88003.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Cadence-8B-LINEAR/8be55d6b-7fe0-41cf-86a6-66327dd88003.json
deleted file mode 100644
index 09b67b6c604700e0436efd1b7b386db07972a218..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Cadence-8B-LINEAR/8be55d6b-7fe0-41cf-86a6-66327dd88003.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Cadence-8B-LINEAR/1762652579.568077",
- "retrieved_timestamp": "1762652579.568078",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Cadence-8B-LINEAR",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Cadence-8B-LINEAR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7682172192006099
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5433358555450108
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16767371601208458
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41734374999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3803191489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Caelid-8B-Model_Stock/8b15f9a3-6f39-4210-b48f-4dc5569114e2.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Caelid-8B-Model_Stock/8b15f9a3-6f39-4210-b48f-4dc5569114e2.json
deleted file mode 100644
index ebb4c22fffee1f902e3e711af99e4a59c53bdab5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Caelid-8B-Model_Stock/8b15f9a3-6f39-4210-b48f-4dc5569114e2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Caelid-8B-Model_Stock/1762652579.5682912",
- "retrieved_timestamp": "1762652579.5682921",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Caelid-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Caelid-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7247281657114235
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5459605196913864
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1510574018126888
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4001041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3816489361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Casuar-9B-Model_Stock/7c5c8fd8-2fbb-41f3-88f3-92a544200204.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Casuar-9B-Model_Stock/7c5c8fd8-2fbb-41f3-88f3-92a544200204.json
deleted file mode 100644
index 6e6613787a784fa12c74f42a23401cf88fe3738e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Casuar-9B-Model_Stock/7c5c8fd8-2fbb-41f3-88f3-92a544200204.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Casuar-9B-Model_Stock/1762652579.5685189",
- "retrieved_timestamp": "1762652579.5685189",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Casuar-9B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Casuar-9B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7764852812759035
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6106681877306871
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21299093655589124
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3447986577181208
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41654166666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4156416223404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Condensed_Milk-8B-Model_Stock/58573d8e-602a-4088-8dec-a738b7e55e9c.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Condensed_Milk-8B-Model_Stock/58573d8e-602a-4088-8dec-a738b7e55e9c.json
deleted file mode 100644
index a17811bfaf8e089fe16f481fabc8bcd12ef527b6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Condensed_Milk-8B-Model_Stock/58573d8e-602a-4088-8dec-a738b7e55e9c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Condensed_Milk-8B-Model_Stock/1762652579.568758",
- "retrieved_timestamp": "1762652579.568759",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Condensed_Milk-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Condensed_Milk-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7536292592543341
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5434864122121906
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17447129909365558
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3213087248322148
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41601041666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38763297872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_CoolerCoder-8B-LINEAR/b3bc4e42-5850-45bd-a0a1-ff6779c04fce.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_CoolerCoder-8B-LINEAR/b3bc4e42-5850-45bd-a0a1-ff6779c04fce.json
deleted file mode 100644
index 3652f21000ac6d2db2a87e7b8ca64f1dd3115938..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_CoolerCoder-8B-LINEAR/b3bc4e42-5850-45bd-a0a1-ff6779c04fce.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_CoolerCoder-8B-LINEAR/1762652579.568993",
- "retrieved_timestamp": "1762652579.568993",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/CoolerCoder-8B-LINEAR",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/CoolerCoder-8B-LINEAR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4519286603988528
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4761504835496542
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07930513595166164
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3963541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31590757978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Damasteel-8B-LINEAR/b0a2ef10-8705-4eae-892d-51f3633dcd87.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Damasteel-8B-LINEAR/b0a2ef10-8705-4eae-892d-51f3633dcd87.json
deleted file mode 100644
index 215188c0e60aca4003506e8aa69325674de5e27d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Damasteel-8B-LINEAR/b0a2ef10-8705-4eae-892d-51f3633dcd87.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Damasteel-8B-LINEAR/1762652579.569221",
- "retrieved_timestamp": "1762652579.569222",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Damasteel-8B-LINEAR",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Damasteel-8B-LINEAR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7384417789243651
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5388142176959776
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16691842900302115
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42124999999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3779089095744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Dearly_Beloved-8B-TIES/3d46ee0f-8ec0-4723-ac8d-fe88db7053c1.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Dearly_Beloved-8B-TIES/3d46ee0f-8ec0-4723-ac8d-fe88db7053c1.json
deleted file mode 100644
index 6450f21ef59015c45ea7b3faadeec19f2206da11..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Dearly_Beloved-8B-TIES/3d46ee0f-8ec0-4723-ac8d-fe88db7053c1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Dearly_Beloved-8B-TIES/1762652579.569437",
- "retrieved_timestamp": "1762652579.569438",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Dearly_Beloved-8B-TIES",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Dearly_Beloved-8B-TIES"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8266687943545348
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4049833102731906
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21148036253776434
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41746875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2826628989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Decayed-8B-LINEAR/5658866d-fd86-4203-b14f-84f9a4784028.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Decayed-8B-LINEAR/5658866d-fd86-4203-b14f-84f9a4784028.json
deleted file mode 100644
index d6f6a4b479fc1fe28a594e1fa6775a3b473f16e2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Decayed-8B-LINEAR/5658866d-fd86-4203-b14f-84f9a4784028.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Decayed-8B-LINEAR/1762652579.569654",
- "retrieved_timestamp": "1762652579.569655",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Decayed-8B-LINEAR",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Decayed-8B-LINEAR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7676176988169169
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5417014088773181
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1714501510574018
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4186145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37632978723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative-8B-Model_Stock/9ef7e716-8638-46ac-a455-f601c1cfddc1.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative-8B-Model_Stock/9ef7e716-8638-46ac-a455-f601c1cfddc1.json
deleted file mode 100644
index cc763e3c073def93d4bde1b0f33b58b4a4333849..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative-8B-Model_Stock/9ef7e716-8638-46ac-a455-f601c1cfddc1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Derivative-8B-Model_Stock/1762652579.569859",
- "retrieved_timestamp": "1762652579.56986",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Derivative-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Derivative-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7667433520835827
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5395493987763994
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17900302114803626
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31711409395973156
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42004166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3810671542553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative_V2-8B-Model_Stock/3320dceb-b5ef-4267-81d3-b6fe2a415eee.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative_V2-8B-Model_Stock/3320dceb-b5ef-4267-81d3-b6fe2a415eee.json
deleted file mode 100644
index 966a070c0bd1c366972c37acb4295428a162764b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative_V2-8B-Model_Stock/3320dceb-b5ef-4267-81d3-b6fe2a415eee.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Derivative_V2-8B-Model_Stock/1762652579.5701172",
- "retrieved_timestamp": "1762652579.570118",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Derivative_V2-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Derivative_V2-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7536791269387447
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5392643954415269
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1797583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41229166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38563829787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative_V2_ALT-8B-Model_Stock/ac19b0a8-1955-4bab-b7ae-451a84dc09c6.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative_V2_ALT-8B-Model_Stock/ac19b0a8-1955-4bab-b7ae-451a84dc09c6.json
deleted file mode 100644
index 8dc10f445311a113079946c3710e60bb0ee95694..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative_V2_ALT-8B-Model_Stock/ac19b0a8-1955-4bab-b7ae-451a84dc09c6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Derivative_V2_ALT-8B-Model_Stock/1762652579.570343",
- "retrieved_timestamp": "1762652579.570344",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Derivative_V2_ALT-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Derivative_V2_ALT-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7719639445560003
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5365351570462934
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18806646525679757
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.311241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41346875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38821476063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative_V3-8B-Model_Stock/54f51897-7b47-4e95-9c1a-58ecd64caa96.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative_V3-8B-Model_Stock/54f51897-7b47-4e95-9c1a-58ecd64caa96.json
deleted file mode 100644
index af833e3518787ba9b813d33de8a7e87766bf0a37..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Derivative_V3-8B-Model_Stock/54f51897-7b47-4e95-9c1a-58ecd64caa96.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Derivative_V3-8B-Model_Stock/1762652579.570688",
- "retrieved_timestamp": "1762652579.570689",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Derivative_V3-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Derivative_V3-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6963767248677952
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.524319745545524
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14652567975830816
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4149895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35023271276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Elusive_Dragon_Heart-8B-LINEAR/fbc53f61-cb3b-4f85-a724-fc07c6912c22.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Elusive_Dragon_Heart-8B-LINEAR/fbc53f61-cb3b-4f85-a724-fc07c6912c22.json
deleted file mode 100644
index 81bbba4e2171090414912476109cda645c407a22..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Elusive_Dragon_Heart-8B-LINEAR/fbc53f61-cb3b-4f85-a724-fc07c6912c22.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Elusive_Dragon_Heart-8B-LINEAR/1762652579.570945",
- "retrieved_timestamp": "1762652579.570946",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Elusive_Dragon_Heart-8B-LINEAR",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Elusive_Dragon_Heart-8B-LINEAR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7131378076836128
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5456414280881592
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14803625377643503
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4145520833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3813996010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 4.015
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Emu_Eggs-9B-Model_Stock/9343177e-5432-47c7-9fb6-90f2dc9125e5.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Emu_Eggs-9B-Model_Stock/9343177e-5432-47c7-9fb6-90f2dc9125e5.json
deleted file mode 100644
index 124f61de381eedf6d9839a1caee59bdf407c41b7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Emu_Eggs-9B-Model_Stock/9343177e-5432-47c7-9fb6-90f2dc9125e5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Emu_Eggs-9B-Model_Stock/1762652579.571181",
- "retrieved_timestamp": "1762652579.571182",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Emu_Eggs-9B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Emu_Eggs-9B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7606982805622415
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6051657213517168
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20996978851963746
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33305369127516776
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4070833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4227061170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Eunoia_Vespera-8B-LINEAR/5a835cef-3db8-40c9-8ae3-022d0719c89e.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Eunoia_Vespera-8B-LINEAR/5a835cef-3db8-40c9-8ae3-022d0719c89e.json
deleted file mode 100644
index 6f1ba7362d97b44e1033940ecca75cc22b2f671a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Eunoia_Vespera-8B-LINEAR/5a835cef-3db8-40c9-8ae3-022d0719c89e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Eunoia_Vespera-8B-LINEAR/1762652579.571407",
- "retrieved_timestamp": "1762652579.571407",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Eunoia_Vespera-8B-LINEAR",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Eunoia_Vespera-8B-LINEAR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7235291249440374
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5399310621081937
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1540785498489426
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4184895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38389295212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Fu_sion_HA-8B-SLERP/5d6eb91b-518c-41ae-9e52-bb741b005601.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Fu_sion_HA-8B-SLERP/5d6eb91b-518c-41ae-9e52-bb741b005601.json
deleted file mode 100644
index d5130f4c9265bb6bc0ee9b86f4a6941dc919c1f7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Fu_sion_HA-8B-SLERP/5d6eb91b-518c-41ae-9e52-bb741b005601.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Fu_sion_HA-8B-SLERP/1762652579.57162",
- "retrieved_timestamp": "1762652579.5716212",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Fu_sion_HA-8B-SLERP",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Fu_sion_HA-8B-SLERP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7609232392274721
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5372804197028272
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17522658610271905
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32298657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41601041666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38248005319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_HOT_STINKING_GARBAGE/70471d77-adb1-49df-ab72-8f43f379ab23.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_HOT_STINKING_GARBAGE/70471d77-adb1-49df-ab72-8f43f379ab23.json
deleted file mode 100644
index 9e69daa4ead02f79bcb1a84a7e6ea466e4a16ea4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_HOT_STINKING_GARBAGE/70471d77-adb1-49df-ab72-8f43f379ab23.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_HOT_STINKING_GARBAGE/1762652579.571834",
- "retrieved_timestamp": "1762652579.5718348",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/HOT_STINKING_GARBAGE",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/HOT_STINKING_GARBAGE"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5754265349273262
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4884000866161456
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06722054380664652
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2751677852348993
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42500000000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30169547872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_H_the_eighth-8B-LINEAR/2bbec710-ce13-4fa3-861b-fce8eee26b3b.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_H_the_eighth-8B-LINEAR/2bbec710-ce13-4fa3-861b-fce8eee26b3b.json
deleted file mode 100644
index 5502a033d5fef229b52c9f6e48263d67b18d9b6d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_H_the_eighth-8B-LINEAR/2bbec710-ce13-4fa3-861b-fce8eee26b3b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_H_the_eighth-8B-LINEAR/1762652579.572039",
- "retrieved_timestamp": "1762652579.5720398",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/H_the_eighth-8B-LINEAR",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/H_the_eighth-8B-LINEAR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7469347996648892
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5383752114303682
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17749244712990936
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32802013422818793
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41728125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3823969414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Happy_New_Year-8B-Model_Stock/170808e4-7506-44c9-8bb7-5dd92037a347.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Happy_New_Year-8B-Model_Stock/170808e4-7506-44c9-8bb7-5dd92037a347.json
deleted file mode 100644
index 3c8b5302c6bc9b129341d34331ed0e29bef46142..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Happy_New_Year-8B-Model_Stock/170808e4-7506-44c9-8bb7-5dd92037a347.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Happy_New_Year-8B-Model_Stock/1762652579.572258",
- "retrieved_timestamp": "1762652579.5722592",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Happy_New_Year-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Happy_New_Year-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7615726272955757
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5367913866457493
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1593655589123867
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4185520833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3878823138297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Heart_Stolen-8B-Model_Stock/86b9c040-4c5e-413d-ac23-1603c499b5de.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Heart_Stolen-8B-Model_Stock/86b9c040-4c5e-413d-ac23-1603c499b5de.json
deleted file mode 100644
index 37464c94bf2ad5347c9ec3be55b16edd689cc7e9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Heart_Stolen-8B-Model_Stock/86b9c040-4c5e-413d-ac23-1603c499b5de.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Heart_Stolen-8B-Model_Stock/1762652579.572714",
- "retrieved_timestamp": "1762652579.5727181",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Heart_Stolen-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Heart_Stolen-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7244533393617822
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5395443745186658
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17220543806646527
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31711409395973156
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41622916666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37940492021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Heart_Stolen-ALT-8B-Model_Stock/141d8908-50cb-4457-a0f0-93d55d1c705b.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Heart_Stolen-ALT-8B-Model_Stock/141d8908-50cb-4457-a0f0-93d55d1c705b.json
deleted file mode 100644
index 176bb423bff6662899c2bec0b71692f1331df8b2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Heart_Stolen-ALT-8B-Model_Stock/141d8908-50cb-4457-a0f0-93d55d1c705b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Heart_Stolen-ALT-8B-Model_Stock/1762652579.573096",
- "retrieved_timestamp": "1762652579.573097",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Heart_Stolen-ALT-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Heart_Stolen-ALT-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7183584001560305
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.526338467747489
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15634441087613293
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011744966442953
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40549999999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37724401595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Here_We_Go_Again-8B-SLERP/1c21cfd2-2b01-44d3-8daa-41493a743a75.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Here_We_Go_Again-8B-SLERP/1c21cfd2-2b01-44d3-8daa-41493a743a75.json
deleted file mode 100644
index d9c59156f431c60a1e6d8703a385d29b06475cfd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Here_We_Go_Again-8B-SLERP/1c21cfd2-2b01-44d3-8daa-41493a743a75.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Here_We_Go_Again-8B-SLERP/1762652579.573366",
- "retrieved_timestamp": "1762652579.573367",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Here_We_Go_Again-8B-SLERP",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Here_We_Go_Again-8B-SLERP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7442120240960651
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5460182474181831
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1729607250755287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3187919463087248
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4186770833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3873005319148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 4.015
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Howdy-8B-LINEAR/88df4a25-089c-4f21-b403-a1f5dad112b3.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Howdy-8B-LINEAR/88df4a25-089c-4f21-b403-a1f5dad112b3.json
deleted file mode 100644
index e2c569e5d8572f91ff78109ffc43964788f155fd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Howdy-8B-LINEAR/88df4a25-089c-4f21-b403-a1f5dad112b3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Howdy-8B-LINEAR/1762652579.573699",
- "retrieved_timestamp": "1762652579.5737002",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Howdy-8B-LINEAR",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Howdy-8B-LINEAR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7377923908562614
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5383981582614435
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17749244712990936
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3145973154362416
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41213541666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3806515957446808
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Incidental-8B-Model_Stock/102ed90e-cbe3-4219-b9c6-cec82c78941f.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Incidental-8B-Model_Stock/102ed90e-cbe3-4219-b9c6-cec82c78941f.json
deleted file mode 100644
index 9e62e6efdfddd9ec906f63670b6082be1de75bac..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Incidental-8B-Model_Stock/102ed90e-cbe3-4219-b9c6-cec82c78941f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Incidental-8B-Model_Stock/1762652579.573979",
- "retrieved_timestamp": "1762652579.5739799",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Incidental-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Incidental-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.748183708116686
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5452070612873019
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16163141993957703
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42401041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3873005319148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Irina-8B-model_stock/60aebc6f-b3ee-4b32-8b89-4359c990fb23.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Irina-8B-model_stock/60aebc6f-b3ee-4b32-8b89-4359c990fb23.json
deleted file mode 100644
index cf7e83e00701459b1e1aa24bb298de28edaf4cf1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Irina-8B-model_stock/60aebc6f-b3ee-4b32-8b89-4359c990fb23.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Irina-8B-model_stock/1762652579.574285",
- "retrieved_timestamp": "1762652579.574286",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Irina-8B-model_stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Irina-8B-model_stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6799403360860294
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5236638956084764
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10196374622356495
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40029166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35738031914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Kindling-8B-Model_Stock/8ee9ad54-c6ca-4afc-931b-ffe1fd1d5971.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Kindling-8B-Model_Stock/8ee9ad54-c6ca-4afc-931b-ffe1fd1d5971.json
deleted file mode 100644
index ed0a794aeac5eee5b71dacf668352a916b0080c3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Kindling-8B-Model_Stock/8ee9ad54-c6ca-4afc-931b-ffe1fd1d5971.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Kindling-8B-Model_Stock/1762652579.57468",
- "retrieved_timestamp": "1762652579.574682",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Kindling-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Kindling-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7308231049171753
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5492054832931256
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17522658610271905
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3179530201342282
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4068333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3829787234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_L3.1-BaeZel-8B-Della/6c7dfbaf-648e-4c4a-907f-8639ab1c7312.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_L3.1-BaeZel-8B-Della/6c7dfbaf-648e-4c4a-907f-8639ab1c7312.json
deleted file mode 100644
index fffb2b7d12ec220b8c0034cf159a96721c640cd9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_L3.1-BaeZel-8B-Della/6c7dfbaf-648e-4c4a-907f-8639ab1c7312.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_L3.1-BaeZel-8B-Della/1762652579.575009",
- "retrieved_timestamp": "1762652579.57501",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/L3.1-BaeZel-8B-Della",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/L3.1-BaeZel-8B-Della"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5180243974875552
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5448449542185521
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17447129909365558
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4199791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3902094414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Laughing_Stock-8B-Model_Stock/cf1b2ab2-d18b-44c1-b0ed-476dba32c034.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Laughing_Stock-8B-Model_Stock/cf1b2ab2-d18b-44c1-b0ed-476dba32c034.json
deleted file mode 100644
index 0255c1efbd6bbdef31ae1c15315240320de07aa7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Laughing_Stock-8B-Model_Stock/cf1b2ab2-d18b-44c1-b0ed-476dba32c034.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Laughing_Stock-8B-Model_Stock/1762652579.5752351",
- "retrieved_timestamp": "1762652579.575236",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Laughing_Stock-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Laughing_Stock-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7189579205397235
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5449429262155
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1578549848942598
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4145520833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3764128989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Lava_Lamp-8B-SLERP/26d89e91-7f52-4913-a4e0-3275cca1d8d7.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Lava_Lamp-8B-SLERP/26d89e91-7f52-4913-a4e0-3275cca1d8d7.json
deleted file mode 100644
index 72514129bd0ff3226d89d5277622bef9a39b06fa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Lava_Lamp-8B-SLERP/26d89e91-7f52-4913-a4e0-3275cca1d8d7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Lava_Lamp-8B-SLERP/1762652579.575455",
- "retrieved_timestamp": "1762652579.575455",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Lava_Lamp-8B-SLERP",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Lava_Lamp-8B-SLERP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7381170848903134
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5367586873360172
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17371601208459214
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4187083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.375
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_LemonP-8B-Model_Stock/f13fb9a9-f53c-4c7e-9e29-fabb010a617b.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_LemonP-8B-Model_Stock/f13fb9a9-f53c-4c7e-9e29-fabb010a617b.json
deleted file mode 100644
index 4c72d7b2a6a6ff09308fe73e6800a8365c023057..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_LemonP-8B-Model_Stock/f13fb9a9-f53c-4c7e-9e29-fabb010a617b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_LemonP-8B-Model_Stock/1762652579.575685",
- "retrieved_timestamp": "1762652579.575686",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/LemonP-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/LemonP-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7676176988169169
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5439348074265458
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17673716012084592
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40810416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40043218085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Lydia_of_Whiterun-8B-LINEAR/cee29aba-b6c1-42a2-88d0-a92080b3c083.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Lydia_of_Whiterun-8B-LINEAR/cee29aba-b6c1-42a2-88d0-a92080b3c083.json
deleted file mode 100644
index 0105a8bb29179e9adc17e8b3aaa52512e1164dd0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Lydia_of_Whiterun-8B-LINEAR/cee29aba-b6c1-42a2-88d0-a92080b3c083.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Lydia_of_Whiterun-8B-LINEAR/1762652579.575901",
- "retrieved_timestamp": "1762652579.575901",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Lydia_of_Whiterun-8B-LINEAR",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Lydia_of_Whiterun-8B-LINEAR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.760323718843779
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5379527944750039
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17673716012084592
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42506249999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3800698138297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Matryoshka-8B-LINEAR/2f8ce822-9278-49e5-878a-69439e794623.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Matryoshka-8B-LINEAR/2f8ce822-9278-49e5-878a-69439e794623.json
deleted file mode 100644
index 202fb4f1ef78ffc65bb27e2ed79dc7a02a00e5c3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Matryoshka-8B-LINEAR/2f8ce822-9278-49e5-878a-69439e794623.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Matryoshka-8B-LINEAR/1762652579.576119",
- "retrieved_timestamp": "1762652579.5761201",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Matryoshka-8B-LINEAR",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Matryoshka-8B-LINEAR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7262519005128614
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5444280006376178
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17522658610271905
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42524999999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3865525265957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Mercury_In_Retrograde-8b-Model-Stock/eff11f37-ec26-4866-8109-0ee6dcac7fec.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Mercury_In_Retrograde-8b-Model-Stock/eff11f37-ec26-4866-8109-0ee6dcac7fec.json
deleted file mode 100644
index 7605ab3318d32341603f1bf7aea8cb3d02de0f3a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Mercury_In_Retrograde-8b-Model-Stock/eff11f37-ec26-4866-8109-0ee6dcac7fec.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Mercury_In_Retrograde-8b-Model-Stock/1762652579.576331",
- "retrieved_timestamp": "1762652579.576332",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Mercury_In_Retrograde-8b-Model-Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Mercury_In_Retrograde-8b-Model-Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7296240641497892
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5390507664719518
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1646525679758308
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4198854166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38289561170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minthy-8B-Model_Stock/394ac507-8bdb-4d06-bf6e-87911443ec2b.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minthy-8B-Model_Stock/394ac507-8bdb-4d06-bf6e-87911443ec2b.json
deleted file mode 100644
index 6eee39abf7eae6431aa05f84c97affbf5c45ba67..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minthy-8B-Model_Stock/394ac507-8bdb-4d06-bf6e-87911443ec2b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Minthy-8B-Model_Stock/1762652579.5765939",
- "retrieved_timestamp": "1762652579.5765948",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Minthy-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Minthy-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.765769269981427
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5352951319641014
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19184290030211482
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40940624999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3992686170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minthy_ALT-8B-Model_Stock/709e429f-0a98-4ae6-b10f-f0546ef2d9b5.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minthy_ALT-8B-Model_Stock/709e429f-0a98-4ae6-b10f-f0546ef2d9b5.json
deleted file mode 100644
index d16b0f35c4cc2c21a0193ab15d8c8daf8c297359..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minthy_ALT-8B-Model_Stock/709e429f-0a98-4ae6-b10f-f0546ef2d9b5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Minthy_ALT-8B-Model_Stock/1762652579.57681",
- "retrieved_timestamp": "1762652579.576811",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Minthy_ALT-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Minthy_ALT-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6991992358054406
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5374800202589046
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17598187311178248
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4225208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3673537234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minthy_V2-8B-Model_Stock/3f8011c6-6826-4788-b848-ec6938eefa7f.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minthy_V2-8B-Model_Stock/3f8011c6-6826-4788-b848-ec6938eefa7f.json
deleted file mode 100644
index bd82fdf4610d9f7fb832edf85e207dd474d97c55..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minthy_V2-8B-Model_Stock/3f8011c6-6826-4788-b848-ec6938eefa7f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Minthy_V2-8B-Model_Stock/1762652579.5770218",
- "retrieved_timestamp": "1762652579.577023",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Minthy_V2-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Minthy_V2-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7125881549843305
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5491095928821667
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1593655589123867
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4198854166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37367021276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minus_Penus-8B-Model_Stock/395b9855-e394-46c9-b95a-75203399aed4.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minus_Penus-8B-Model_Stock/395b9855-e394-46c9-b95a-75203399aed4.json
deleted file mode 100644
index 8b32069def018577458e9e30d6b59aa83f224271..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Minus_Penus-8B-Model_Stock/395b9855-e394-46c9-b95a-75203399aed4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Minus_Penus-8B-Model_Stock/1762652579.577236",
- "retrieved_timestamp": "1762652579.577237",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Minus_Penus-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Minus_Penus-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7311477989512272
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5343781571200968
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2001510574018127
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40190624999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3751662234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Not_Even_My_Final_Form-8B-Model_Stock/bc85d435-a537-4ed0-bf4e-02d9c30b5fa3.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Not_Even_My_Final_Form-8B-Model_Stock/bc85d435-a537-4ed0-bf4e-02d9c30b5fa3.json
deleted file mode 100644
index 116766542f3ad6e7de0713678656c73d343290ec..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Not_Even_My_Final_Form-8B-Model_Stock/bc85d435-a537-4ed0-bf4e-02d9c30b5fa3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Not_Even_My_Final_Form-8B-Model_Stock/1762652579.577775",
- "retrieved_timestamp": "1762652579.5777762",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Not_Even_My_Final_Form-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7721889032212308
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5350849793007441
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17598187311178248
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41473958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3839760638297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Nother_One-8B-Model_Stock/464f363d-ab94-4cac-8846-fbcf25be3dec.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Nother_One-8B-Model_Stock/464f363d-ab94-4cac-8846-fbcf25be3dec.json
deleted file mode 100644
index 25711f8bd2f37ef3c13bec5be78fca37257d2668..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Nother_One-8B-Model_Stock/464f363d-ab94-4cac-8846-fbcf25be3dec.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Nother_One-8B-Model_Stock/1762652579.578036",
- "retrieved_timestamp": "1762652579.578037",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Nother_One-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Nother_One-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6863101016414226
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5204527600425481
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15181268882175228
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38702083333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35945811170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Noxis-8B-LINEAR/8778fbef-d0f0-4a47-8adb-8e8f594d9195.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Noxis-8B-LINEAR/8778fbef-d0f0-4a47-8adb-8e8f594d9195.json
deleted file mode 100644
index dd11d492689843f00967530e22dd0c0857372fbd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Noxis-8B-LINEAR/8778fbef-d0f0-4a47-8adb-8e8f594d9195.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Noxis-8B-LINEAR/1762652579.578263",
- "retrieved_timestamp": "1762652579.578263",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Noxis-8B-LINEAR",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Noxis-8B-LINEAR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6913057354486096
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5420956502068554
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19788519637462235
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3187919463087248
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4230833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3660239361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Nullsworn-12B-LINEAR/3f92cd91-57b4-46eb-864b-2e4870b920fc.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Nullsworn-12B-LINEAR/3f92cd91-57b4-46eb-864b-2e4870b920fc.json
deleted file mode 100644
index 714856aa45edfcba46fd5a1e2edaa060774ec02f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Nullsworn-12B-LINEAR/3f92cd91-57b4-46eb-864b-2e4870b920fc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Nullsworn-12B-LINEAR/1762652579.578492",
- "retrieved_timestamp": "1762652579.5784929",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Nullsworn-12B-LINEAR",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Nullsworn-12B-LINEAR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44356086295473784
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5483045026677609
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11253776435045318
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43495833333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3645279255319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Nwah-8B-Model_Stock/34dec14e-846a-4037-8dbd-f1d1599d5adf.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Nwah-8B-Model_Stock/34dec14e-846a-4037-8dbd-f1d1599d5adf.json
deleted file mode 100644
index ed1ad924ae656b7aaf6b34aeb4d65ef4c589f70d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Nwah-8B-Model_Stock/34dec14e-846a-4037-8dbd-f1d1599d5adf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Nwah-8B-Model_Stock/1762652579.578718",
- "retrieved_timestamp": "1762652579.578719",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Nwah-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Nwah-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7715893828375378
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5384269019541996
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1797583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4039479166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3807347074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ONeil-model_stock-8B/7f5fa4e0-e28c-46df-acbd-22e7b010a407.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ONeil-model_stock-8B/7f5fa4e0-e28c-46df-acbd-22e7b010a407.json
deleted file mode 100644
index 4a4a7f16273ef9cb67cffe05f37f512fc3dc2eb0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ONeil-model_stock-8B/7f5fa4e0-e28c-46df-acbd-22e7b010a407.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_ONeil-model_stock-8B/1762652579.578939",
- "retrieved_timestamp": "1762652579.57894",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/ONeil-model_stock-8B",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/ONeil-model_stock-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6785662043378236
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5548337982400763
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10120845921450151
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41734374999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35987367021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Oh_Boy-8B-LINEAR/393ad85d-6b8b-466d-99e0-6a89bf0ce66e.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Oh_Boy-8B-LINEAR/393ad85d-6b8b-466d-99e0-6a89bf0ce66e.json
deleted file mode 100644
index 68abbce7f57a5c57d8f956ef8758fa907b756f21..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Oh_Boy-8B-LINEAR/393ad85d-6b8b-466d-99e0-6a89bf0ce66e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Oh_Boy-8B-LINEAR/1762652579.5791628",
- "retrieved_timestamp": "1762652579.5791638",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Oh_Boy-8B-LINEAR",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Oh_Boy-8B-LINEAR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7503069633018169
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5375114406292553
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1782477341389728
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4107708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3848902925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_OrangeJ-8B-Model_Stock/d436f2a4-ebd5-4712-871a-0616f491bda4.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_OrangeJ-8B-Model_Stock/d436f2a4-ebd5-4712-871a-0616f491bda4.json
deleted file mode 100644
index 07b0a699eff7c6a3bb1eeb0c344b2120b99760f3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_OrangeJ-8B-Model_Stock/d436f2a4-ebd5-4712-871a-0616f491bda4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_OrangeJ-8B-Model_Stock/1762652579.57939",
- "retrieved_timestamp": "1762652579.579391",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/OrangeJ-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/OrangeJ-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7841039552830933
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5413478053905038
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17598187311178248
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011744966442953
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4027708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3968583776595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Promissum_Mane-8B-LINEAR-lorablated/827c075e-78a2-4e4b-a561-b95728cdf2b2.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Promissum_Mane-8B-LINEAR-lorablated/827c075e-78a2-4e4b-a561-b95728cdf2b2.json
deleted file mode 100644
index 4ce48a0783d18370d67a4d2bf8d930ad9168df4e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Promissum_Mane-8B-LINEAR-lorablated/827c075e-78a2-4e4b-a561-b95728cdf2b2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Promissum_Mane-8B-LINEAR-lorablated/1762652579.579823",
- "retrieved_timestamp": "1762652579.5798242",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Promissum_Mane-8B-LINEAR-lorablated",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Promissum_Mane-8B-LINEAR-lorablated"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7156356245872064
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5435183631990302
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15332326283987915
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4197916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37391954787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Promissum_Mane-8B-LINEAR/d44a7888-1463-4492-9359-f8287a8f7f01.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Promissum_Mane-8B-LINEAR/d44a7888-1463-4492-9359-f8287a8f7f01.json
deleted file mode 100644
index 63d99304488e2eac930079140b6c421d902eb583..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Promissum_Mane-8B-LINEAR/d44a7888-1463-4492-9359-f8287a8f7f01.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Promissum_Mane-8B-LINEAR/1762652579.5796108",
- "retrieved_timestamp": "1762652579.579612",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Promissum_Mane-8B-LINEAR",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Promissum_Mane-8B-LINEAR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7150361042035134
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5457684398146738
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1555891238670695
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42004166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38505651595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_RPMash-8B-Model_Stock/aa8e7299-0c36-4f27-b8c9-e9a5e4da8c97.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_RPMash-8B-Model_Stock/aa8e7299-0c36-4f27-b8c9-e9a5e4da8c97.json
deleted file mode 100644
index 9654d3bd7889a8dd6c0ee603b742fd843242a45f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_RPMash-8B-Model_Stock/aa8e7299-0c36-4f27-b8c9-e9a5e4da8c97.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_RPMash-8B-Model_Stock/1762652579.5800488",
- "retrieved_timestamp": "1762652579.58005",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/RPMash-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/RPMash-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4563502617499346
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5169088291675549
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10800604229607251
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28691275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.405375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3603723404255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_RPMash_V3-8B-Model_Stock/c7e0c75d-f0c1-4a44-b540-607e99c69e92.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_RPMash_V3-8B-Model_Stock/c7e0c75d-f0c1-4a44-b540-607e99c69e92.json
deleted file mode 100644
index d9c6c8429fbfb8c2f069fe9942dbb2abbe46afb0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_RPMash_V3-8B-Model_Stock/c7e0c75d-f0c1-4a44-b540-607e99c69e92.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_RPMash_V3-8B-Model_Stock/1762652579.580262",
- "retrieved_timestamp": "1762652579.580263",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/RPMash_V3-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/RPMash_V3-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.70491961329273
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5217453397523113
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37775000000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36136968085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Rusted_Gold-8B-LINEAR/70f7842f-1111-4c6a-914d-35e48537d1fc.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Rusted_Gold-8B-LINEAR/70f7842f-1111-4c6a-914d-35e48537d1fc.json
deleted file mode 100644
index 867a4619ec1e22a78759da399c22301f493c831b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Rusted_Gold-8B-LINEAR/70f7842f-1111-4c6a-914d-35e48537d1fc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Rusted_Gold-8B-LINEAR/1762652579.58047",
- "retrieved_timestamp": "1762652579.580471",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Rusted_Gold-8B-LINEAR",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Rusted_Gold-8B-LINEAR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7296240641497892
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5386646439313688
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1933534743202417
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41775
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37799202127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Rusted_Platinum-8B-LINEAR/4b9a1e5a-dc99-44d9-b4f4-6bef1eb285ca.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Rusted_Platinum-8B-LINEAR/4b9a1e5a-dc99-44d9-b4f4-6bef1eb285ca.json
deleted file mode 100644
index 0a20e5ce836fac2ccce2aab1e57f1aca469f22d5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Rusted_Platinum-8B-LINEAR/4b9a1e5a-dc99-44d9-b4f4-6bef1eb285ca.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Rusted_Platinum-8B-LINEAR/1762652579.580692",
- "retrieved_timestamp": "1762652579.580693",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Rusted_Platinum-8B-LINEAR",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Rusted_Platinum-8B-LINEAR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7179838384375679
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5427868416987739
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17220543806646527
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39666666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37300531914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Rusted_Platinum-8B-Model_Stock/219e3183-8d9c-4188-a550-72d7f20ff1ec.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Rusted_Platinum-8B-Model_Stock/219e3183-8d9c-4188-a550-72d7f20ff1ec.json
deleted file mode 100644
index 119929b3551355a647120e647e05534c0e356d7c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Rusted_Platinum-8B-Model_Stock/219e3183-8d9c-4188-a550-72d7f20ff1ec.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Rusted_Platinum-8B-Model_Stock/1762652579.580914",
- "retrieved_timestamp": "1762652579.580915",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Rusted_Platinum-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Rusted_Platinum-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44078821970150317
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5242840148078765
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10196374622356495
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37406249999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3546376329787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Sellen-8B-model_stock/45e281e8-f28c-40a5-92e4-c16b627adb32.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Sellen-8B-model_stock/45e281e8-f28c-40a5-92e4-c16b627adb32.json
deleted file mode 100644
index a0b5b091118d5639f841852d0e0027d6c5be277c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Sellen-8B-model_stock/45e281e8-f28c-40a5-92e4-c16b627adb32.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Sellen-8B-model_stock/1762652579.5811431",
- "retrieved_timestamp": "1762652579.581144",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Sellen-8B-model_stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Sellen-8B-model_stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7112893788481229
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5231680557624704
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1336858006042296
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3960416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35696476063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Something-8B-Model_Stock/1d1bf908-44fb-4b87-b52d-845a1cdafc08.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Something-8B-Model_Stock/1d1bf908-44fb-4b87-b52d-845a1cdafc08.json
deleted file mode 100644
index e20e1de5f95d61953915bfef98abcc0a7da2c742..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Something-8B-Model_Stock/1d1bf908-44fb-4b87-b52d-845a1cdafc08.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Something-8B-Model_Stock/1762652579.5815392",
- "retrieved_timestamp": "1762652579.58154",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Something-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Something-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5043107842746135
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5395029370473196
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1797583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31711409395973156
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41873958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3885472074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Spring_Dusk-8B-SCE/e9124a70-037d-41ed-becb-953382a3f43a.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Spring_Dusk-8B-SCE/e9124a70-037d-41ed-becb-953382a3f43a.json
deleted file mode 100644
index 7bb2dd3064cd2a82546cbb45c8430a7af1f290f1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Spring_Dusk-8B-SCE/e9124a70-037d-41ed-becb-953382a3f43a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Spring_Dusk-8B-SCE/1762652579.581773",
- "retrieved_timestamp": "1762652579.581774",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Spring_Dusk-8B-SCE",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Spring_Dusk-8B-SCE"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6514636719459922
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5635271357931001
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07628398791540786
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.287751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45997916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3435837765957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Dawn-8B-SCE/7d7eefa4-193a-4158-a903-9a8484b36e9a.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Dawn-8B-SCE/7d7eefa4-193a-4158-a903-9a8484b36e9a.json
deleted file mode 100644
index 6fc6bc4a1655ddd88826813e88f2a22f787f49a2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Dawn-8B-SCE/7d7eefa4-193a-4158-a903-9a8484b36e9a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Summer_Dawn-8B-SCE/1762652579.581994",
- "retrieved_timestamp": "1762652579.581994",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Summer_Dawn-8B-SCE",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Summer_Dawn-8B-SCE"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6642032030567783
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.539111375413361
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17220543806646527
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41204166666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37533244680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Dusk-8B-TIES/a2cad434-61a0-40be-8740-6c6a8e3cea25.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Dusk-8B-TIES/a2cad434-61a0-40be-8740-6c6a8e3cea25.json
deleted file mode 100644
index 3c8f275e5412301a28a268e69df658da570dec18..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Dusk-8B-TIES/a2cad434-61a0-40be-8740-6c6a8e3cea25.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Summer_Dusk-8B-TIES/1762652579.582258",
- "retrieved_timestamp": "1762652579.582258",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Summer_Dusk-8B-TIES",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Summer_Dusk-8B-TIES"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4922206412319312
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5359662578395569
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18051359516616314
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4266770833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3855551861702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Rain-8B-SCE/9f4730ec-a162-455c-83ef-c8fa9ebd036c.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Rain-8B-SCE/9f4730ec-a162-455c-83ef-c8fa9ebd036c.json
deleted file mode 100644
index 7f34c06e7d9198fa03b34389cef5ed0752502f94..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Rain-8B-SCE/9f4730ec-a162-455c-83ef-c8fa9ebd036c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Summer_Rain-8B-SCE/1762652579.582465",
- "retrieved_timestamp": "1762652579.5824661",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Summer_Rain-8B-SCE",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Summer_Rain-8B-SCE"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5459259210007226
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5845948417986419
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0702416918429003
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4477291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3550531914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Rain-8B-TIES/1704c33f-e00e-4fbb-be4c-3d1fe85d635f.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Rain-8B-TIES/1704c33f-e00e-4fbb-be4c-3d1fe85d635f.json
deleted file mode 100644
index f9f31e11a68d48540d310537116db6eaf0209efe..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Summer_Rain-8B-TIES/1704c33f-e00e-4fbb-be4c-3d1fe85d635f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Summer_Rain-8B-TIES/1762652579.582679",
- "retrieved_timestamp": "1762652579.582679",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Summer_Rain-8B-TIES",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Summer_Rain-8B-TIES"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5444021861992845
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5845948417986419
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0702416918429003
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4477291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3550531914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Sun-8B-Model_Stock/13b16b8d-533f-4323-a75a-e16df96b8351.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Sun-8B-Model_Stock/13b16b8d-533f-4323-a75a-e16df96b8351.json
deleted file mode 100644
index 52aaf65a81581c8cf4c53ac6337eddc89875aa37..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Sun-8B-Model_Stock/13b16b8d-533f-4323-a75a-e16df96b8351.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Sun-8B-Model_Stock/1762652579.58288",
- "retrieved_timestamp": "1762652579.58288",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Sun-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Sun-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7758358932077998
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5263511014407583
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20996978851963746
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40975
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38347739361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Sweetened_Condensed_Milk-8B-Model_Stock/d0461daa-d106-44ce-9d9c-03a6fef37b45.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Sweetened_Condensed_Milk-8B-Model_Stock/d0461daa-d106-44ce-9d9c-03a6fef37b45.json
deleted file mode 100644
index 41608e6a0594b25097bb2770a89cd92519ea6416..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Sweetened_Condensed_Milk-8B-Model_Stock/d0461daa-d106-44ce-9d9c-03a6fef37b45.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Sweetened_Condensed_Milk-8B-Model_Stock/1762652579.5830941",
- "retrieved_timestamp": "1762652579.583095",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Sweetened_Condensed_Milk-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7417142071924716
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5406287643522295
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18731117824773413
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4106770833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38480718085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST02-Ignore/414bb880-e2b2-43fb-ad9b-f51d7c4b7ad4.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST02-Ignore/414bb880-e2b2-43fb-ad9b-f51d7c4b7ad4.json
deleted file mode 100644
index 7b5aa339cc23207f28489c93d899b156516c430e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST02-Ignore/414bb880-e2b2-43fb-ad9b-f51d7c4b7ad4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_TEST02-Ignore/1762652579.583313",
- "retrieved_timestamp": "1762652579.583314",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/TEST02-Ignore",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/TEST02-Ignore"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6118964347930158
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5601644306147606
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08685800604229607
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41985416666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3468251329787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST03-ignore/ceba83fe-89b2-4b8a-ba7d-ed1ad9acb070.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST03-ignore/ceba83fe-89b2-4b8a-ba7d-ed1ad9acb070.json
deleted file mode 100644
index 47b5c3573950ad9013509a6ed893e8ad3eb07c55..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST03-ignore/ceba83fe-89b2-4b8a-ba7d-ed1ad9acb070.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_TEST03-ignore/1762652579.583565",
- "retrieved_timestamp": "1762652579.5835662",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/TEST03-ignore",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/TEST03-ignore"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6967014189018471
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5383414134372179
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16540785498489427
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4186145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37890625
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST06-ignore/15dbba84-b177-4bcd-8874-0153152f0015.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST06-ignore/15dbba84-b177-4bcd-8874-0153152f0015.json
deleted file mode 100644
index b603b1f8c298f09fe0c1810ccf4af8640a5b018a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST06-ignore/15dbba84-b177-4bcd-8874-0153152f0015.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_TEST06-ignore/1762652579.583824",
- "retrieved_timestamp": "1762652579.5838249",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/TEST06-ignore",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/TEST06-ignore"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7322969720342026
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5509060880148441
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11782477341389729
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28691275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4224895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3615359042553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST07-ignore/39b77252-2729-429b-b220-3b19ca0b6a6c.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST07-ignore/39b77252-2729-429b-b220-3b19ca0b6a6c.json
deleted file mode 100644
index 4242a37fa60c85bd57ddd20008da203ca1d5796a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST07-ignore/39b77252-2729-429b-b220-3b19ca0b6a6c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_TEST07-ignore/1762652579.5841951",
- "retrieved_timestamp": "1762652579.584198",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/TEST07-ignore",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/TEST07-ignore"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7399655137258031
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5561275711510345
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1661631419939577
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40937500000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3879654255319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST08-ignore/79b7bdb6-82a7-466f-8d9a-b26211f4ee73.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST08-ignore/79b7bdb6-82a7-466f-8d9a-b26211f4ee73.json
deleted file mode 100644
index abeccc73bb8ceeafb83775b19971c5b7b0184e1f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_TEST08-ignore/79b7bdb6-82a7-466f-8d9a-b26211f4ee73.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_TEST08-ignore/1762652579.5845299",
- "retrieved_timestamp": "1762652579.5845308",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/TEST08-ignore",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/TEST08-ignore"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7466599733152479
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5453519655444978
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18202416918429004
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40810416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3853058510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Trinas_Nectar-8B-model_stock/922fec6c-cfec-47cf-a374-5676635a5b40.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Trinas_Nectar-8B-model_stock/922fec6c-cfec-47cf-a374-5676635a5b40.json
deleted file mode 100644
index d9cf1082676e7226c9b03585b537a0e5ea3161a0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Trinas_Nectar-8B-model_stock/922fec6c-cfec-47cf-a374-5676635a5b40.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Trinas_Nectar-8B-model_stock/1762652579.58478",
- "retrieved_timestamp": "1762652579.5847821",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Trinas_Nectar-8B-model_stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Trinas_Nectar-8B-model_stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7259272064788096
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5256123853406084
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15256797583081572
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2860738255033557
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4067708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36178523936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_UNTESTED-VENN_1.2-8B-Model_Stock/5945660f-40e1-4c49-8f28-581f06b51e59.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_UNTESTED-VENN_1.2-8B-Model_Stock/5945660f-40e1-4c49-8f28-581f06b51e59.json
deleted file mode 100644
index 416b273e840fe45cc8fadfcb4e4326f0339307c8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_UNTESTED-VENN_1.2-8B-Model_Stock/5945660f-40e1-4c49-8f28-581f06b51e59.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_UNTESTED-VENN_1.2-8B-Model_Stock/1762652579.585024",
- "retrieved_timestamp": "1762652579.585025",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/UNTESTED-VENN_1.2-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47176270074513404
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5475027267486955
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1540785498489426
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31543624161073824
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4449375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.378656914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 4.015
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_VENN_1.2-8B-Model_Stock/0adfce8d-0070-4375-be96-a34466851101.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_VENN_1.2-8B-Model_Stock/0adfce8d-0070-4375-be96-a34466851101.json
deleted file mode 100644
index 7e9d0819e7144c82cfac1c825abcc32430e2d1fb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_VENN_1.2-8B-Model_Stock/0adfce8d-0070-4375-be96-a34466851101.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_VENN_1.2-8B-Model_Stock/1762652579.5852559",
- "retrieved_timestamp": "1762652579.585257",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/VENN_1.2-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/VENN_1.2-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7226049105262924
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5458812486333333
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17069486404833836
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42001041666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3720910904255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 4.015
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_WIP-Acacia-8B-Model_Stock/d28bdd9d-53bb-498f-84cb-7d482f41d005.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_WIP-Acacia-8B-Model_Stock/d28bdd9d-53bb-498f-84cb-7d482f41d005.json
deleted file mode 100644
index dd970d1bca4ea43eb5bf47705ecc2d3656ea4b4f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_WIP-Acacia-8B-Model_Stock/d28bdd9d-53bb-498f-84cb-7d482f41d005.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_WIP-Acacia-8B-Model_Stock/1762652579.5854762",
- "retrieved_timestamp": "1762652579.585477",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/WIP-Acacia-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/WIP-Acacia-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6246359659038019
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5194665568943516
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16691842900302115
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4225833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37367021276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_WIP_Damascus-8B-TIES/38e5b086-4a73-4ffa-9b32-eb80405fecb5.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_WIP_Damascus-8B-TIES/38e5b086-4a73-4ffa-9b32-eb80405fecb5.json
deleted file mode 100644
index a273083453ed9fd1dd8d26cbccfb7fb9b56f890e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_WIP_Damascus-8B-TIES/38e5b086-4a73-4ffa-9b32-eb80405fecb5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_WIP_Damascus-8B-TIES/1762652579.5856981",
- "retrieved_timestamp": "1762652579.5856981",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/WIP_Damascus-8B-TIES",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/WIP_Damascus-8B-TIES"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4776326812856554
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5410672913070808
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16540785498489427
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41185416666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37608045212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Wannabe-8B-Model_Stock/fafc0425-a4f0-4c5b-8328-5dfca7d6402f.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Wannabe-8B-Model_Stock/fafc0425-a4f0-4c5b-8328-5dfca7d6402f.json
deleted file mode 100644
index e36878b91e9d535f23ea580187f8f0ffbe126511..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Wannabe-8B-Model_Stock/fafc0425-a4f0-4c5b-8328-5dfca7d6402f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Wannabe-8B-Model_Stock/1762652579.585919",
- "retrieved_timestamp": "1762652579.58592",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Wannabe-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Wannabe-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7204816553411615
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5389637944785705
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17749244712990936
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011744966442953
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41346875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.383061835106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_What_A_Thrill-8B-Model_Stock/b9fadd79-8220-4023-b92a-c38b07a90e8f.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_What_A_Thrill-8B-Model_Stock/b9fadd79-8220-4023-b92a-c38b07a90e8f.json
deleted file mode 100644
index d3fcab7860be9e6e4ff3c71f584ece901bf569f0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_What_A_Thrill-8B-Model_Stock/b9fadd79-8220-4023-b92a-c38b07a90e8f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_What_A_Thrill-8B-Model_Stock/1762652579.5861409",
- "retrieved_timestamp": "1762652579.586142",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/What_A_Thrill-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/What_A_Thrill-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7064433480941679
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.531144904394377
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18202416918429004
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40804166666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3615359042553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter-8B-SCE/b351842a-aa2a-494a-8159-c732f071c7c6.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter-8B-SCE/b351842a-aa2a-494a-8159-c732f071c7c6.json
deleted file mode 100644
index eb6e9782d605d585bbadcb90b99a60849f32f401..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter-8B-SCE/b351842a-aa2a-494a-8159-c732f071c7c6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Winter-8B-SCE/1762652579.586359",
- "retrieved_timestamp": "1762652579.58636",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Winter-8B-SCE",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Winter-8B-SCE"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7536292592543341
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5261733490323383
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19184290030211482
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4070833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38389295212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter_Dawn-8B-TIES/21947721-9f9a-4cc2-aa88-e1853f488167.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter_Dawn-8B-TIES/21947721-9f9a-4cc2-aa88-e1853f488167.json
deleted file mode 100644
index a518a2260740a1cb7f7149ab7b9ef19da3cf9b9f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter_Dawn-8B-TIES/21947721-9f9a-4cc2-aa88-e1853f488167.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Winter_Dawn-8B-TIES/1762652579.586569",
- "retrieved_timestamp": "1762652579.58657",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Winter_Dawn-8B-TIES",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Winter_Dawn-8B-TIES"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5496482665992899
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5309416142154736
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18580060422960726
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42785416666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3910405585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter_Dusk-8B-TIES/cdc03c25-5bfb-4185-8e29-40e1af2ef253.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter_Dusk-8B-TIES/cdc03c25-5bfb-4185-8e29-40e1af2ef253.json
deleted file mode 100644
index 8a68e76c3f205eea6a613bbee64f70d7952bbf02..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter_Dusk-8B-TIES/cdc03c25-5bfb-4185-8e29-40e1af2ef253.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Winter_Dusk-8B-TIES/1762652579.586781",
- "retrieved_timestamp": "1762652579.586782",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Winter_Dusk-8B-TIES",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Winter_Dusk-8B-TIES"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7152610628687439
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4951882158967103
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07175226586102719
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3688229166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3478224734042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter_Night-8B-Model_Stock/49d98c73-75d8-4629-8cc2-a03592b0f551.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter_Night-8B-Model_Stock/49d98c73-75d8-4629-8cc2-a03592b0f551.json
deleted file mode 100644
index 700ce6efc1870184a934029c494cad05353aa5c2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Winter_Night-8B-Model_Stock/49d98c73-75d8-4629-8cc2-a03592b0f551.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Winter_Night-8B-Model_Stock/1762652579.587023",
- "retrieved_timestamp": "1762652579.587024",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Winter_Night-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Winter_Night-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7040452665593957
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5184968441488284
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14577039274924472
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3914270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3666057180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Yafune-8B-Model_Stock/edaf2deb-16a3-4109-84e0-e65498e09d1f.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Yafune-8B-Model_Stock/edaf2deb-16a3-4109-84e0-e65498e09d1f.json
deleted file mode 100644
index 5d6cc5ca49019fb6236d4edb0d220c29d1181499..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Yafune-8B-Model_Stock/edaf2deb-16a3-4109-84e0-e65498e09d1f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Yafune-8B-Model_Stock/1762652579.587391",
- "retrieved_timestamp": "1762652579.587392",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Yafune-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Yafune-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7533045652202822
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5466719512941253
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1661631419939577
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3271812080536913
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41728125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38505651595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Yearn_V3-8B-Model_Stock/763eec85-4395-43b6-aa79-9ecb024eb7af.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Yearn_V3-8B-Model_Stock/763eec85-4395-43b6-aa79-9ecb024eb7af.json
deleted file mode 100644
index c6c7ea1264ee991401f988dfbb22af6c542c3c3e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Yearn_V3-8B-Model_Stock/763eec85-4395-43b6-aa79-9ecb024eb7af.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Yearn_V3-8B-Model_Stock/1762652579.587668",
- "retrieved_timestamp": "1762652579.587669",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Yearn_V3-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Yearn_V3-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7289746760816855
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5322019394938072
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18957703927492447
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3908958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3801529255319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ZEUS-8B-V17-Abliterated_ALT/538f74e4-2587-43d7-a3fb-7826f3995ad9.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ZEUS-8B-V17-Abliterated_ALT/538f74e4-2587-43d7-a3fb-7826f3995ad9.json
deleted file mode 100644
index 685131ff18f7e344e6eada95fdbfeb4c67435f29..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ZEUS-8B-V17-Abliterated_ALT/538f74e4-2587-43d7-a3fb-7826f3995ad9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_ZEUS-8B-V17-Abliterated_ALT/1762652579.587883",
- "retrieved_timestamp": "1762652579.587884",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/ZEUS-8B-V17-Abliterated_ALT",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/ZEUS-8B-V17-Abliterated_ALT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5511221337163171
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5231075970343642
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1903323262839879
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41492708333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3890458776595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Zelus-8B-Model_Stock/2a1d9c9c-b3e4-49d8-96cb-720e53184db6.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Zelus-8B-Model_Stock/2a1d9c9c-b3e4-49d8-96cb-720e53184db6.json
deleted file mode 100644
index 1f5bfd3df85ecaa541ec588f454440aa02375aa7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Zelus-8B-Model_Stock/2a1d9c9c-b3e4-49d8-96cb-720e53184db6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Zelus-8B-Model_Stock/1762652579.5881522",
- "retrieved_timestamp": "1762652579.5881522",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Zelus-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Zelus-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.778833495126265
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5307011398651839
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1646525679758308
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42140625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38414228723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Zelus_V2-8B-Model_Stock/b385729e-27f8-4bf2-b2c6-674504fcd75b.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Zelus_V2-8B-Model_Stock/b385729e-27f8-4bf2-b2c6-674504fcd75b.json
deleted file mode 100644
index 602bc81b0d87f267a6e64c256f37f98bd20520da..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_Zelus_V2-8B-Model_Stock/b385729e-27f8-4bf2-b2c6-674504fcd75b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_Zelus_V2-8B-Model_Stock/1762652579.588366",
- "retrieved_timestamp": "1762652579.5883808",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/Zelus_V2-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/Zelus_V2-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7898243327703826
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5344816839912676
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2054380664652568
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3960729166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38331117021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_hakuchido-8B-MODEL_STOCK/a9d24835-302c-445b-b1fd-89d41e3e7878.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_hakuchido-8B-MODEL_STOCK/a9d24835-302c-445b-b1fd-89d41e3e7878.json
deleted file mode 100644
index 32b08e124bf24d877cdecfc9bc0da137e8f6219a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_hakuchido-8B-MODEL_STOCK/a9d24835-302c-445b-b1fd-89d41e3e7878.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_hakuchido-8B-MODEL_STOCK/1762652579.589018",
- "retrieved_timestamp": "1762652579.589018",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/hakuchido-8B-MODEL_STOCK",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/hakuchido-8B-MODEL_STOCK"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7375175645066203
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5398373390214104
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19486404833836857
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41746875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3781582446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ichor-8B-Model_Stock/b1b0d419-e025-488a-a367-6769edfdf8ff.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ichor-8B-Model_Stock/b1b0d419-e025-488a-a367-6769edfdf8ff.json
deleted file mode 100644
index ccef33494daebcca247b040642b81bd34746bbb7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ichor-8B-Model_Stock/b1b0d419-e025-488a-a367-6769edfdf8ff.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_ichor-8B-Model_Stock/1762652579.589237",
- "retrieved_timestamp": "1762652579.589238",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/ichor-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/ichor-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5386319410275846
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5084222037759372
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10876132930513595
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3238255033557047
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42121875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31507646276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ichor_1.1-8B-Model_Stock/64afccfe-af45-4c26-878a-eb01b56f3524.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ichor_1.1-8B-Model_Stock/64afccfe-af45-4c26-878a-eb01b56f3524.json
deleted file mode 100644
index 630eef705154af9433902582544b34053d6c1a75..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_ichor_1.1-8B-Model_Stock/64afccfe-af45-4c26-878a-eb01b56f3524.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_ichor_1.1-8B-Model_Stock/1762652579.589439",
- "retrieved_timestamp": "1762652579.589439",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/ichor_1.1-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/ichor_1.1-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8096328851890761
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.528067770617839
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17749244712990936
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4067708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3855551861702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_inexpertus-8B-Model_Stock/1f0112d0-46b4-4a2c-9ccc-4872ccbae7a5.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_inexpertus-8B-Model_Stock/1f0112d0-46b4-4a2c-9ccc-4872ccbae7a5.json
deleted file mode 100644
index 54ea922efe4e56fc104fb672124f420ce97a360b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_inexpertus-8B-Model_Stock/1f0112d0-46b4-4a2c-9ccc-4872ccbae7a5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_inexpertus-8B-Model_Stock/1762652579.589726",
- "retrieved_timestamp": "1762652579.589729",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/inexpertus-8B-Model_Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/inexpertus-8B-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7795327508787795
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5280190470468065
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17069486404833836
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41182291666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3790724734042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_inexpertus_1.1-8B-LINEAR/86f45b60-19d1-41fa-8538-3d22ea28a98f.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_inexpertus_1.1-8B-LINEAR/86f45b60-19d1-41fa-8538-3d22ea28a98f.json
deleted file mode 100644
index e22bcbbe1f30c2cd213ba29152bd7d90d4a1e80a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_inexpertus_1.1-8B-LINEAR/86f45b60-19d1-41fa-8538-3d22ea28a98f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_inexpertus_1.1-8B-LINEAR/1762652579.59006",
- "retrieved_timestamp": "1762652579.590061",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/inexpertus_1.1-8B-LINEAR",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/inexpertus_1.1-8B-LINEAR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7527050448365891
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5524638802167572
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1729607250755287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41734374999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38272938829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_inexpertus_1.2-8B-LINEAR/c2465654-27c4-4cad-94fa-3b0bff1fd242.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_inexpertus_1.2-8B-LINEAR/c2465654-27c4-4cad-94fa-3b0bff1fd242.json
deleted file mode 100644
index 17134c2eb6fe45695d21091d8e3daeac68687964..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_inexpertus_1.2-8B-LINEAR/c2465654-27c4-4cad-94fa-3b0bff1fd242.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_inexpertus_1.2-8B-LINEAR/1762652579.590318",
- "retrieved_timestamp": "1762652579.5903192",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/inexpertus_1.2-8B-LINEAR",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/inexpertus_1.2-8B-LINEAR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7347947889377962
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5523440600721518
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15861027190332325
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41334374999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37882313829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_mergekit-nuslerp-nqzkedi/c1bff8a8-6159-4fe6-a9bd-846846d0e633.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_mergekit-nuslerp-nqzkedi/c1bff8a8-6159-4fe6-a9bd-846846d0e633.json
deleted file mode 100644
index 23b82570ec356478a22bec0c6a03b323b9d5574e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_mergekit-nuslerp-nqzkedi/c1bff8a8-6159-4fe6-a9bd-846846d0e633.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_mergekit-nuslerp-nqzkedi/1762652579.590566",
- "retrieved_timestamp": "1762652579.590566",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/mergekit-nuslerp-nqzkedi",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/mergekit-nuslerp-nqzkedi"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7764852812759035
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5361918366546249
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18806646525679757
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011744966442953
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4224583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3918716755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_remember_to_breathe-8b-Model-Stock/76309e63-a135-45cf-9f06-b091215726d0.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_remember_to_breathe-8b-Model-Stock/76309e63-a135-45cf-9f06-b091215726d0.json
deleted file mode 100644
index ad964573051367800aa1186536b16a32bfb673f5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_remember_to_breathe-8b-Model-Stock/76309e63-a135-45cf-9f06-b091215726d0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_remember_to_breathe-8b-Model-Stock/1762652579.5907981",
- "retrieved_timestamp": "1762652579.590799",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/remember_to_breathe-8b-Model-Stock",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/remember_to_breathe-8b-Model-Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7104150321147887
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5411654435599922
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1487915407854985
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011744966442953
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4144583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37608045212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_test/a4f14e1c-4c16-4fb8-9753-f05a6c5f2836.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_test/a4f14e1c-4c16-4fb8-9753-f05a6c5f2836.json
deleted file mode 100644
index ae885b201954a3daf2c2c512a4f2c43320f34a86..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_test/a4f14e1c-4c16-4fb8-9753-f05a6c5f2836.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_test/1762652579.5910451",
- "retrieved_timestamp": "1762652579.5910459",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/test",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/test"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49369450834895856
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5371873804638203
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1933534743202417
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4350833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3646941489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_test_ALT/1ca8f31a-4df9-4eb5-8ded-506d80246cdd.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_test_ALT/1ca8f31a-4df9-4eb5-8ded-506d80246cdd.json
deleted file mode 100644
index d57995380072bdc488e6d2423bab0521e5b21c14..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_test_ALT/1ca8f31a-4df9-4eb5-8ded-506d80246cdd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_test_ALT/1762652579.591327",
- "retrieved_timestamp": "1762652579.591328",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/test_ALT",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/test_ALT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.499689712185889
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5370433315307738
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17069486404833836
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26929530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4362916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3492353723404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_tests_pending-do_not_use_yet/de113d87-7875-4f5c-89eb-48a59797b19b.json b/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_tests_pending-do_not_use_yet/de113d87-7875-4f5c-89eb-48a59797b19b.json
deleted file mode 100644
index ca04950268808ba8f2cbb427f6c9a15b6f40593e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/DreadPoor/DreadPoor_tests_pending-do_not_use_yet/de113d87-7875-4f5c-89eb-48a59797b19b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DreadPoor_tests_pending-do_not_use_yet/1762652579.591608",
- "retrieved_timestamp": "1762652579.591609",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DreadPoor/tests_pending-do_not_use_yet",
- "developer": "DreadPoor",
- "inference_platform": "unknown",
- "id": "DreadPoor/tests_pending-do_not_use_yet"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7691414336183549
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5407897873885027
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19788519637462235
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40047916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38272938829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ECE-ILAB-PRYMMAL/ECE-ILAB-PRYMMAL_ILAB-Merging-3B-V2/cbdf2130-1b6a-43ae-a503-4fc7acf14a76.json b/leaderboard_data/HFOpenLLMv2/ECE-ILAB-PRYMMAL/ECE-ILAB-PRYMMAL_ILAB-Merging-3B-V2/cbdf2130-1b6a-43ae-a503-4fc7acf14a76.json
deleted file mode 100644
index 356f857f52e678ca0e3de02c27a1a59709870d40..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ECE-ILAB-PRYMMAL/ECE-ILAB-PRYMMAL_ILAB-Merging-3B-V2/cbdf2130-1b6a-43ae-a503-4fc7acf14a76.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ECE-ILAB-PRYMMAL_ILAB-Merging-3B-V2/1762652579.5918348",
- "retrieved_timestamp": "1762652579.591836",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2",
- "developer": "ECE-ILAB-PRYMMAL",
- "inference_platform": "unknown",
- "id": "ECE-ILAB-PRYMMAL/ILAB-Merging-3B-V2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40289432040319684
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5401935891431586
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15181268882175228
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43321875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38605385638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Phi3ForCausalLM",
- "params_billions": 3.821
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Edgerunners/Edgerunners_meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16/1e2cd0e7-ce74-4eac-86fb-64412d1d2094.json b/leaderboard_data/HFOpenLLMv2/Edgerunners/Edgerunners_meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16/1e2cd0e7-ce74-4eac-86fb-64412d1d2094.json
deleted file mode 100644
index 335566455cc3cbdf2826b811f17b60cd85f39060..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Edgerunners/Edgerunners_meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16/1e2cd0e7-ce74-4eac-86fb-64412d1d2094.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Edgerunners_meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16/1762652579.592541",
- "retrieved_timestamp": "1762652579.592542",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16",
- "developer": "Edgerunners",
- "inference_platform": "unknown",
- "id": "Edgerunners/meta-llama-3-8b-instruct-hf-ortho-baukit-34fail-3000total-bf16"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7147114101694614
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4979908369885237
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09063444108761329
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33415625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36361369680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-1.4b/e268be37-589d-41f2-af98-a85bb412eb44.json b/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-1.4b/e268be37-589d-41f2-af98-a85bb412eb44.json
deleted file mode 100644
index 88531211e10c4a7ee09b9562d8a95051489490af..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-1.4b/e268be37-589d-41f2-af98-a85bb412eb44.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-1.4b/1762652579.593903",
- "retrieved_timestamp": "1762652579.593904",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EleutherAI/pythia-1.4b",
- "developer": "EleutherAI",
- "inference_platform": "unknown",
- "id": "EleutherAI/pythia-1.4b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23708094522533543
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.315042649740714
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015105740181268883
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35378125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11228390957446809
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GPTNeoXForCausalLM",
- "params_billions": 1.515
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-12b/4df16bb2-996f-473f-9096-a8a8e152ca9b.json b/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-12b/4df16bb2-996f-473f-9096-a8a8e152ca9b.json
deleted file mode 100644
index 8f888f322b651bb19d749df08f4e09ca2f2153be..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-12b/4df16bb2-996f-473f-9096-a8a8e152ca9b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-12b/1762652579.5942001",
- "retrieved_timestamp": "1762652579.594201",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EleutherAI/pythia-12b",
- "developer": "EleutherAI",
- "inference_platform": "unknown",
- "id": "EleutherAI/pythia-12b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24714756845170813
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3179653957935337
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01661631419939577
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24664429530201343
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3646979166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11087101063829788
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GPTNeoXForCausalLM",
- "params_billions": 12.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-160m/d59ad4b0-e58e-48d6-90eb-93398c46251a.json b/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-160m/d59ad4b0-e58e-48d6-90eb-93398c46251a.json
deleted file mode 100644
index 41f5ef5a30236e493c972e58c94722bcfa2bb4be..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-160m/d59ad4b0-e58e-48d6-90eb-93398c46251a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-160m/1762652579.5944068",
- "retrieved_timestamp": "1762652579.594408",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EleutherAI/pythia-160m",
- "developer": "EleutherAI",
- "inference_platform": "unknown",
- "id": "EleutherAI/pythia-160m"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18155161637787737
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2970437484241321
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.00906344410876133
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25838926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4179375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11195146276595745
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GPTNeoXForCausalLM",
- "params_billions": 0.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-1b/a21cc55c-e9df-46ef-beed-b67a1750ddb7.json b/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-1b/a21cc55c-e9df-46ef-beed-b67a1750ddb7.json
deleted file mode 100644
index be335ed366caa17a157428b38fd7c84a6e4b3d46..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-1b/a21cc55c-e9df-46ef-beed-b67a1750ddb7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-1b/1762652579.594618",
- "retrieved_timestamp": "1762652579.594618",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EleutherAI/pythia-1b",
- "developer": "EleutherAI",
- "inference_platform": "unknown",
- "id": "EleutherAI/pythia-1b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2207941594968018
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3004093017564394
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.00906344410876133
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25671140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35520833333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11361369680851063
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GPTNeoXForCausalLM",
- "params_billions": 1.079
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-2.8b/0afcbde6-b822-4264-8733-bc255ea73314.json b/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-2.8b/0afcbde6-b822-4264-8733-bc255ea73314.json
deleted file mode 100644
index 4d8b20a0c24dc66a27ded7975ce0f8af020cc64f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-2.8b/0afcbde6-b822-4264-8733-bc255ea73314.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-2.8b/1762652579.594833",
- "retrieved_timestamp": "1762652579.5948339",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EleutherAI/pythia-2.8b",
- "developer": "EleutherAI",
- "inference_platform": "unknown",
- "id": "EleutherAI/pythia-2.8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21732226049105263
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3224085936276087
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.013595166163141994
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3485729166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11369680851063829
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GPTNeoXForCausalLM",
- "params_billions": 2.909
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-410m/c9db5f06-9aac-4678-bfe0-65773ece4558.json b/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-410m/c9db5f06-9aac-4678-bfe0-65773ece4558.json
deleted file mode 100644
index 6699e12e9a4c9aaa6ad66bc04138b2cc6825aac6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-410m/c9db5f06-9aac-4678-bfe0-65773ece4558.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-410m/1762652579.5950441",
- "retrieved_timestamp": "1762652579.595045",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EleutherAI/pythia-410m",
- "developer": "EleutherAI",
- "inference_platform": "unknown",
- "id": "EleutherAI/pythia-410m"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21954525104500505
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.302813387064426
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.009818731117824773
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35781250000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11278257978723404
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GPTNeoXForCausalLM",
- "params_billions": 0.506
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-6.9b/6ae207e3-2596-4b28-b058-d47d07465192.json b/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-6.9b/6ae207e3-2596-4b28-b058-d47d07465192.json
deleted file mode 100644
index 8acb3bcdcf04483476cd4130dd17f4802a1e08f6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EleutherAI/EleutherAI_pythia-6.9b/6ae207e3-2596-4b28-b058-d47d07465192.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EleutherAI_pythia-6.9b/1762652579.595358",
- "retrieved_timestamp": "1762652579.595359",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EleutherAI/pythia-6.9b",
- "developer": "EleutherAI",
- "inference_platform": "unknown",
- "id": "EleutherAI/pythia-6.9b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22811362739752744
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3232287869322383
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.014350453172205438
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2516778523489933
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3590520833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1146941489361702
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GPTNeoXForCausalLM",
- "params_billions": 6.9
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EnnoAi/EnnoAi_EnnoAi-7B-French-Instruct-202502/75939d35-c0ca-4256-b667-fe6042ca5979.json b/leaderboard_data/HFOpenLLMv2/EnnoAi/EnnoAi_EnnoAi-7B-French-Instruct-202502/75939d35-c0ca-4256-b667-fe6042ca5979.json
deleted file mode 100644
index fedade8dc97a08f961367e05d5e51140b44a40e7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EnnoAi/EnnoAi_EnnoAi-7B-French-Instruct-202502/75939d35-c0ca-4256-b667-fe6042ca5979.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EnnoAi_EnnoAi-7B-French-Instruct-202502/1762652579.596549",
- "retrieved_timestamp": "1762652579.59655",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EnnoAi/EnnoAi-7B-French-Instruct-202502",
- "developer": "EnnoAi",
- "inference_platform": "unknown",
- "id": "EnnoAi/EnnoAi-7B-French-Instruct-202502"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5564424615575562
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5574545199388612
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3723564954682779
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45997916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4013464095744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 7.456
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_Azure_Dusk-v0.2/79790560-846a-48fb-b37a-462162eb0e97.json b/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_Azure_Dusk-v0.2/79790560-846a-48fb-b37a-462162eb0e97.json
deleted file mode 100644
index bc455d7884f2115848fa87a1405d67a24d48341a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_Azure_Dusk-v0.2/79790560-846a-48fb-b37a-462162eb0e97.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Epiculous_Azure_Dusk-v0.2/1762652579.5970619",
- "retrieved_timestamp": "1762652579.5970628",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Epiculous/Azure_Dusk-v0.2",
- "developer": "Epiculous",
- "inference_platform": "unknown",
- "id": "Epiculous/Azure_Dusk-v0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.346715603487635
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4119721873553597
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02945619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3834583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3034408244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_Crimson_Dawn-v0.2/91b7917e-a908-4281-9a4d-a2c1e7558105.json b/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_Crimson_Dawn-v0.2/91b7917e-a908-4281-9a4d-a2c1e7558105.json
deleted file mode 100644
index b65e8093479c366f6c448e111e2a6446c2523a47..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_Crimson_Dawn-v0.2/91b7917e-a908-4281-9a4d-a2c1e7558105.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Epiculous_Crimson_Dawn-v0.2/1762652579.5973198",
- "retrieved_timestamp": "1762652579.5973198",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Epiculous/Crimson_Dawn-v0.2",
- "developer": "Epiculous",
- "inference_platform": "unknown",
- "id": "Epiculous/Crimson_Dawn-v0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3103454389907667
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44823796489645434
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04305135951661632
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4151770833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27210771276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_NovaSpark/9270e697-84b1-46c5-afcc-481065f2be8f.json b/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_NovaSpark/9270e697-84b1-46c5-afcc-481065f2be8f.json
deleted file mode 100644
index d68250ff8d0f40e8e7073b67ea11b0d6f53b1467..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_NovaSpark/9270e697-84b1-46c5-afcc-481065f2be8f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Epiculous_NovaSpark/1762652579.597535",
- "retrieved_timestamp": "1762652579.597536",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Epiculous/NovaSpark",
- "developer": "Epiculous",
- "inference_platform": "unknown",
- "id": "Epiculous/NovaSpark"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6408473960203371
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5063958663768304
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15181268882175228
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3881979166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3648603723404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_Violet_Twilight-v0.2/83990950-a34c-463f-9a1a-d9371910da6f.json b/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_Violet_Twilight-v0.2/83990950-a34c-463f-9a1a-d9371910da6f.json
deleted file mode 100644
index f9ef64fdb02367ae3369b0148a88098d5c94b3ee..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Epiculous/Epiculous_Violet_Twilight-v0.2/83990950-a34c-463f-9a1a-d9371910da6f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Epiculous_Violet_Twilight-v0.2/1762652579.597749",
- "retrieved_timestamp": "1762652579.59775",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Epiculous/Violet_Twilight-v0.2",
- "developer": "Epiculous",
- "inference_platform": "unknown",
- "id": "Epiculous/Violet_Twilight-v0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45317756885064964
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4614552476845888
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02870090634441088
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42993750000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3110871010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_DeepPhi-3.5-mini-instruct/b367fb18-f302-41ec-a5f9-7d47766ca6f3.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_DeepPhi-3.5-mini-instruct/b367fb18-f302-41ec-a5f9-7d47766ca6f3.json
deleted file mode 100644
index d54a01d07e9f42146b5c0e5fa37ee8aa06dfb1f4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_DeepPhi-3.5-mini-instruct/b367fb18-f302-41ec-a5f9-7d47766ca6f3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_DeepPhi-3.5-mini-instruct/1762652579.5991712",
- "retrieved_timestamp": "1762652579.599172",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/DeepPhi-3.5-mini-instruct",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/DeepPhi-3.5-mini-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1325915238234551
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28822860667627487
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.006797583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2332214765100671
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36562500000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11028922872340426
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.821
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_FineLlama3.1-8B-Instruct/a99828d9-a521-4b46-bd81-e791fae7bcf8.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_FineLlama3.1-8B-Instruct/a99828d9-a521-4b46-bd81-e791fae7bcf8.json
deleted file mode 100644
index feac98594908fe1a50c8835a2e890f86358c8765..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_FineLlama3.1-8B-Instruct/a99828d9-a521-4b46-bd81-e791fae7bcf8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_FineLlama3.1-8B-Instruct/1762652579.5997",
- "retrieved_timestamp": "1762652579.599701",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/FineLlama3.1-8B-Instruct",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/FineLlama3.1-8B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08000992921005155
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45573635384163325
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03474320241691843
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3481666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3112533244680851
- }
- }
- ],
- "additional_details": {
- "precision": "4bit",
- "architecture": "?",
- "params_billions": 14.483
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-12B/bdb69cfa-cce7-4813-babb-b6f987be90de.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-12B/bdb69cfa-cce7-4813-babb-b6f987be90de.json
deleted file mode 100644
index a3bec4c83b875225f98b048992df67f4f5bbc606..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-12B/bdb69cfa-cce7-4813-babb-b6f987be90de.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-12B/1762652579.59992",
- "retrieved_timestamp": "1762652579.59992",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Fireball-12B",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Fireball-12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1833501775289565
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5110893652548262
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04078549848942598
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42363541666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3343583776595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200/627a984d-8a4b-4a10-ac9e-05ccdbcc1835.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200/627a984d-8a4b-4a10-ac9e-05ccdbcc1835.json
deleted file mode 100644
index d7c6ece5eb912c1bff93e7a8a68554669a9f13cd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200/627a984d-8a4b-4a10-ac9e-05ccdbcc1835.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200/1762652579.600397",
- "retrieved_timestamp": "1762652579.600397",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Fireball-Alpaca-Llama-3.1-8B-Philos-DPO-200"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4577243934981405
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4838398624677178
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12311178247734139
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39445833333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35829454787234044
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto/b8b22223-7ef6-4fec-9928-68de2ce516e6.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto/b8b22223-7ef6-4fec-9928-68de2ce516e6.json
deleted file mode 100644
index d680108c8db153aec975068fb3f27918b6f7d2e5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto/b8b22223-7ef6-4fec-9928-68de2ce516e6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto/1762652579.601048",
- "retrieved_timestamp": "1762652579.6010492",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-0.001-128K-auto"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44318630123627534
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4823644760491404
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13293051359516617
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31208053691275167
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4066458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3515625
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/7268e623-7dc3-4a79-b410-3f2efdbb6b1b.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/7268e623-7dc3-4a79-b410-3f2efdbb6b1b.json
deleted file mode 100644
index 35487c5e8d6aabc2a9b94e3c6154649bdfe3e14c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/7268e623-7dc3-4a79-b410-3f2efdbb6b1b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/1762652579.6022642",
- "retrieved_timestamp": "1762652579.6022651",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7207066140063919
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4610092915501656
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13141993957703926
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3432395833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3353557180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/ba8d6727-fe89-4bab-95a2-5f70d77034dc.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/ba8d6727-fe89-4bab-95a2-5f70d77034dc.json
deleted file mode 100644
index efd0ead2d0a647115c40e7d1c891e7f94f156b61..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/ba8d6727-fe89-4bab-95a2-5f70d77034dc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto/1762652579.601946",
- "retrieved_timestamp": "1762652579.6019468",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7304984108831234
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46492466713692354
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13972809667673716
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32088541666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34798869680851063
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds/1ad587be-8544-4c37-bb8c-e21ad685039c.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds/1ad587be-8544-4c37-bb8c-e21ad685039c.json
deleted file mode 100644
index 89ea0a504ecd8e38f0539c834b2eb6a90da79701..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds/1ad587be-8544-4c37-bb8c-e21ad685039c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds/1762652579.60172",
- "retrieved_timestamp": "1762652579.601721",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.669099101495144
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4668070143164938
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1336858006042296
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34178125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33892952127659576
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code/5f40e687-560e-4846-bbc1-4c2300680d4b.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code/5f40e687-560e-4846-bbc1-4c2300680d4b.json
deleted file mode 100644
index 9f28c1467593e4c17039d298b45c63823996b629..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code/5f40e687-560e-4846-bbc1-4c2300680d4b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code/1762652579.601493",
- "retrieved_timestamp": "1762652579.601493",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5975334335119704
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4904191122627008
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1336858006042296
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40103125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34225398936170215
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K/839b6ee8-2f25-4b53-abec-a0a9dd198f04.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K/839b6ee8-2f25-4b53-abec-a0a9dd198f04.json
deleted file mode 100644
index 6d128b3261b15738032149a5b2107814e2823151..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K/839b6ee8-2f25-4b53-abec-a0a9dd198f04.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K/1762652579.6012669",
- "retrieved_timestamp": "1762652579.601268",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4457339858242796
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48973199216860547
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12084592145015106
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37622916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3543051861702128
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT/6f29d957-8b65-4ee7-96dd-da2477023403.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT/6f29d957-8b65-4ee7-96dd-da2477023403.json
deleted file mode 100644
index 5213420b82165d0ed9a8f8297e7c9744a06f62e5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT/6f29d957-8b65-4ee7-96dd-da2477023403.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT/1762652579.6025012",
- "retrieved_timestamp": "1762652579.6025019",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-COT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4578241288669619
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4760520079608936
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13821752265861026
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3881354166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3470744680851064
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto/c39007d8-b4b8-485a-88af-39d18a6007c3.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto/c39007d8-b4b8-485a-88af-39d18a6007c3.json
deleted file mode 100644
index a9a5f3042e39bde250f5cb243b7ee99fbd1544a0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto/c39007d8-b4b8-485a-88af-39d18a6007c3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto/1762652579.602742",
- "retrieved_timestamp": "1762652579.6027431",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.004-128K-code-ds-auto"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7204816553411615
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4817795525811035
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14350453172205438
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2483221476510067
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35480385638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Math/506bb9ca-e322-4ee3-b2d6-96e334a99473.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Math/506bb9ca-e322-4ee3-b2d6-96e334a99473.json
deleted file mode 100644
index bc1dc68fea01f82a9cc7c086ba5db9d0e474881f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Math/506bb9ca-e322-4ee3-b2d6-96e334a99473.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.1-8B-Instruct-Math/1762652579.602981",
- "retrieved_timestamp": "1762652579.6029818",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Math"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46229559790245434
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49829504320793055
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10800604229607251
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3640729166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33311170212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO/e351aba3-7a05-400b-abbf-d09c1fe333e3.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO/e351aba3-7a05-400b-abbf-d09c1fe333e3.json
deleted file mode 100644
index 67c67047e4659a09c8a09038753f2ef8e32ecfb6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO/e351aba3-7a05-400b-abbf-d09c1fe333e3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO/1762652579.60321",
- "retrieved_timestamp": "1762652579.603211",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Fireball-Meta-Llama-3.2-8B-Instruct-agent-003-128k-code-DPO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46109655713506825
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48010141537970213
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12537764350453173
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3998229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35206117021276595
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Mistral-Nemo-Base-2407-v1-DPO2/6a0cc28d-d7bc-454d-ab7c-93c823256f30.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Mistral-Nemo-Base-2407-v1-DPO2/6a0cc28d-d7bc-454d-ab7c-93c823256f30.json
deleted file mode 100644
index d26513a5e5aafecf835774796a0a262a52978150..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Fireball-Mistral-Nemo-Base-2407-v1-DPO2/6a0cc28d-d7bc-454d-ab7c-93c823256f30.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Fireball-Mistral-Nemo-Base-2407-v1-DPO2/1762652579.603439",
- "retrieved_timestamp": "1762652579.60344",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Fireball-Mistral-Nemo-Base-2407-v1-DPO2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18607295309778055
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49677687590350894
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03625377643504532
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4040104166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33527260638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Mistral-Nemo-Instruct-12B-Philosophy-Math/ee2ab45a-4a93-4942-8510-aef93b39b7e3.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Mistral-Nemo-Instruct-12B-Philosophy-Math/ee2ab45a-4a93-4942-8510-aef93b39b7e3.json
deleted file mode 100644
index 4e763c824945e708f9b80d7d412a264c55b547bc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Mistral-Nemo-Instruct-12B-Philosophy-Math/ee2ab45a-4a93-4942-8510-aef93b39b7e3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Mistral-Nemo-Instruct-12B-Philosophy-Math/1762652579.6045282",
- "retrieved_timestamp": "1762652579.604529",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Mistral-Nemo-Instruct-12B-Philosophy-Math"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06946790072563022
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5364928342081372
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09592145015105741
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42921875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32962101063829785
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy/644cdea0-49f2-43b9-b94d-55d31c0e0d54.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy/644cdea0-49f2-43b9-b94d-55d31c0e0d54.json
deleted file mode 100644
index 00987b14c3a0a9db319e5d86f319d6d7eaba78b5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy/644cdea0-49f2-43b9-b94d-55d31c0e0d54.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy/1762652579.6049678",
- "retrieved_timestamp": "1762652579.6049678",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Empathy"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7100903380807368
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46279874531423665
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13972809667673716
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27684563758389263
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3194895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33111702127659576
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic/e2422bfe-8569-4181-8ec1-955086bbb8bb.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic/e2422bfe-8569-4181-8ec1-955086bbb8bb.json
deleted file mode 100644
index 018d579060839a24c9ec6f95bb00f80cdd62cb21..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic/e2422bfe-8569-4181-8ec1-955086bbb8bb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic/1762652579.605414",
- "retrieved_timestamp": "1762652579.6054149",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-Logic"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.712213593265868
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45659361690861294
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12462235649546828
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32348958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33502327127659576
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent/98c2fc89-acc4-4740-9d24-c9e9c2cd9ad7.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent/98c2fc89-acc4-4740-9d24-c9e9c2cd9ad7.json
deleted file mode 100644
index 9e0d4955a4a6e46d18bc96475e79cc8ab0c15f32..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent/98c2fc89-acc4-4740-9d24-c9e9c2cd9ad7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent/1762652579.605665",
- "retrieved_timestamp": "1762652579.6056662",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Polypsyche-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-ds-auto-divergent"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6915306941138402
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4524732961901791
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12915407854984895
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26677852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35775
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32903922872340424
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-1B-Instruct-v1.2/3e1fd9a0-a037-4278-baaa-b444d3723557.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-1B-Instruct-v1.2/3e1fd9a0-a037-4278-baaa-b444d3723557.json
deleted file mode 100644
index ed8de8fc9f322f9b6ffa1d7d78cbc917b9b0ff47..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-1B-Instruct-v1.2/3e1fd9a0-a037-4278-baaa-b444d3723557.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.2-1B-Instruct-v1.2/1762652579.606377",
- "retrieved_timestamp": "1762652579.606377",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40871443325930756
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3324495305251265
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05060422960725076
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3221875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11785239361702128
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.236
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-1B-Instruct-v1.3/9c141030-9c3f-4e80-8b97-9297f3d81df6.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-1B-Instruct-v1.3/9c141030-9c3f-4e80-8b97-9297f3d81df6.json
deleted file mode 100644
index 5b768a6f9228f04ba8b86cb108496fef890795b8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-1B-Instruct-v1.3/9c141030-9c3f-4e80-8b97-9297f3d81df6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.2-1B-Instruct-v1.3/1762652579.606596",
- "retrieved_timestamp": "1762652579.6065972",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Reasoning-Llama-3.2-1B-Instruct-v1.3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3272816127874041
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3262818751942827
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05060422960725076
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25838926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.326
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11727061170212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.236
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO/d09af70f-bb55-40e8-88f2-a78f20c90b8e.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO/d09af70f-bb55-40e8-88f2-a78f20c90b8e.json
deleted file mode 100644
index a22033ab2aaa066273f47ac589c1e6efddaf82fd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO/d09af70f-bb55-40e8-88f2-a78f20c90b8e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO/1762652579.6070201",
- "retrieved_timestamp": "1762652579.607021",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1-ORPO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7289746760816855
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45181862491313
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15332326283987915
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27348993288590606
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3486666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3100066489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-3B-Math-Instruct-RE1/099d3be6-bd40-416f-90a1-582f66049c54.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-3B-Math-Instruct-RE1/099d3be6-bd40-416f-90a1-582f66049c54.json
deleted file mode 100644
index 19c3ea30d9fad81d1644f53df7496667a37a08de..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_Reasoning-Llama-3.2-3B-Math-Instruct-RE1/099d3be6-bd40-416f-90a1-582f66049c54.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Reasoning-Llama-3.2-3B-Math-Instruct-RE1/1762652579.606812",
- "retrieved_timestamp": "1762652579.606813",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Reasoning-Llama-3.2-3B-Math-Instruct-RE1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5119538380386264
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43810846923178864
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10800604229607251
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34352083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2789228723404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math/03d616a2-9a52-4014-8ecf-94dc93a5b4d2.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math/03d616a2-9a52-4014-8ecf-94dc93a5b4d2.json
deleted file mode 100644
index 088a1f9c883cda82eebc803fb119067e5e0b1d08..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math/03d616a2-9a52-4014-8ecf-94dc93a5b4d2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math/1762652579.60724",
- "retrieved_timestamp": "1762652579.607241",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/ReasoningCore-1.0-3B-Instruct-r01-Reflect-Math"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5902893212232432
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.436379591348482
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14803625377643503
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3314270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28233045212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-0/9835468b-c049-4562-8633-864d29c7bb75.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-0/9835468b-c049-4562-8633-864d29c7bb75.json
deleted file mode 100644
index 7647636ddc98d1ff8da7159306da626dee974b3b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-0/9835468b-c049-4562-8633-864d29c7bb75.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-0/1762652579.60745",
- "retrieved_timestamp": "1762652579.60745",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/ReasoningCore-3B-0",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/ReasoningCore-3B-0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7341454008696924
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44460707451155984
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15861027190332325
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35539583333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3172373670212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-Instruct-r01-Reflect/b3efb02e-5312-48cf-b9e9-e90d3d5d9a7d.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-Instruct-r01-Reflect/b3efb02e-5312-48cf-b9e9-e90d3d5d9a7d.json
deleted file mode 100644
index cf9aa82c71bfa3658f9500ed70b3cb21b2df95e0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-Instruct-r01-Reflect/b3efb02e-5312-48cf-b9e9-e90d3d5d9a7d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-Instruct-r01-Reflect/1762652579.607657",
- "retrieved_timestamp": "1762652579.607658",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/ReasoningCore-3B-Instruct-r01-Reflect"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7334960128015887
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44496323889512146
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1540785498489426
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27348993288590606
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3527291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31441156914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-R01/5b06f64a-5c31-457e-a414-00e35888a6b2.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-R01/5b06f64a-5c31-457e-a414-00e35888a6b2.json
deleted file mode 100644
index 7831b3f28c12f8fa7a7a85e8c64af9abbd23218e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-R01/5b06f64a-5c31-457e-a414-00e35888a6b2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-R01/1762652579.607871",
- "retrieved_timestamp": "1762652579.607872",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/ReasoningCore-3B-R01",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/ReasoningCore-3B-R01"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29760590787998065
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43725189001258497
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1299093655589124
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31945833333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25914228723404253
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2/83b3c488-c210-4ce7-8f7f-75d0d04d5b02.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2/83b3c488-c210-4ce7-8f7f-75d0d04d5b02.json
deleted file mode 100644
index 7c98fbfb0a2ecb9d9754d4ecb2357e228ccec496..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2/83b3c488-c210-4ce7-8f7f-75d0d04d5b02.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-RE1-V2/1762652579.6080902",
- "retrieved_timestamp": "1762652579.6080909",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/ReasoningCore-3B-RE1-V2",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/ReasoningCore-3B-RE1-V2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7393161256576994
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44623884450165807
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15634441087613293
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27348993288590606
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3540625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31806848404255317
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2A/512a09c1-6c1c-4120-a659-91809607393a.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2A/512a09c1-6c1c-4120-a659-91809607393a.json
deleted file mode 100644
index 2345c24c0a154a6465f538a0efbefc911c14687b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2A/512a09c1-6c1c-4120-a659-91809607393a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-RE1-V2A/1762652579.608308",
- "retrieved_timestamp": "1762652579.608309",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/ReasoningCore-3B-RE1-V2A",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/ReasoningCore-3B-RE1-V2A"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5732534120577845
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4189899823502799
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09290030211480363
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33520833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2736037234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2B/f92ef151-aa21-4240-8de6-1ff04bec55d9.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2B/f92ef151-aa21-4240-8de6-1ff04bec55d9.json
deleted file mode 100644
index 2996bac5b1ab19e0b2d7434683bb0e5d2ba1142d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2B/f92ef151-aa21-4240-8de6-1ff04bec55d9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-RE1-V2B/1762652579.60862",
- "retrieved_timestamp": "1762652579.6086211",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/ReasoningCore-3B-RE1-V2B",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/ReasoningCore-3B-RE1-V2B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5051097753959495
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41678877951897175
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10725075528700906
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3448229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26728723404255317
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2C/88cb3df4-7cbb-440a-87d4-9b2a89f3572c.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2C/88cb3df4-7cbb-440a-87d4-9b2a89f3572c.json
deleted file mode 100644
index 7ed9cce0dae8a29366420e1714000cd52ef66d9c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-RE1-V2C/88cb3df4-7cbb-440a-87d4-9b2a89f3572c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-RE1-V2C/1762652579.608856",
- "retrieved_timestamp": "1762652579.6088572",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/ReasoningCore-3B-RE1-V2C",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/ReasoningCore-3B-RE1-V2C"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5057092957796425
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41774567831526244
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09743202416918428
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34215625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2691156914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-T1-V1/ec3846e6-d111-4c77-93fb-8d1d8106271a.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-T1-V1/ec3846e6-d111-4c77-93fb-8d1d8106271a.json
deleted file mode 100644
index 9bdff3b2250e470fa76e0bd3cfe77b6bf8ba906d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-T1-V1/ec3846e6-d111-4c77-93fb-8d1d8106271a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-T1-V1/1762652579.609117",
- "retrieved_timestamp": "1762652579.609117",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/ReasoningCore-3B-T1-V1",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/ReasoningCore-3B-T1-V1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7207564816908026
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4516908992961786
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14577039274924472
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35403125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31200132978723405
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-T1_1/ce5a0509-e68c-40f4-8b7b-c56ba90c0e10.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-T1_1/ce5a0509-e68c-40f4-8b7b-c56ba90c0e10.json
deleted file mode 100644
index c302de06d7bbf27fc4cd3c6e5098d8fef4c1012e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI/EpistemeAI_ReasoningCore-3B-T1_1/ce5a0509-e68c-40f4-8b7b-c56ba90c0e10.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_ReasoningCore-3B-T1_1/1762652579.609335",
- "retrieved_timestamp": "1762652579.6093361",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/ReasoningCore-3B-T1_1",
- "developer": "EpistemeAI",
- "inference_platform": "unknown",
- "id": "EpistemeAI/ReasoningCore-3B-T1_1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7274509412802475
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45239424517060806
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1540785498489426
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3553645833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3116688829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-12B-v1.2/de86ca37-ffcb-41df-a0d1-68cb545ec1de.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-12B-v1.2/de86ca37-ffcb-41df-a0d1-68cb545ec1de.json
deleted file mode 100644
index ea71ead47e8751ffc408b696958fc775b743ad22..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-12B-v1.2/de86ca37-ffcb-41df-a0d1-68cb545ec1de.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-12B-v1.2/1762652579.609813",
- "retrieved_timestamp": "1762652579.609814",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI2/Fireball-12B-v1.2",
- "developer": "EpistemeAI2",
- "inference_platform": "unknown",
- "id": "EpistemeAI2/Fireball-12B-v1.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13553925805750963
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5018583230653281
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04154078549848943
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4173125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33369348404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo/7e03e547-5324-4c5d-b364-413014fad7eb.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo/7e03e547-5324-4c5d-b364-413014fad7eb.json
deleted file mode 100644
index b4930a65b734bfe9e8a1f075720f1cd9f0f0bd75..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo/7e03e547-5324-4c5d-b364-413014fad7eb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo/1762652579.610973",
- "retrieved_timestamp": "1762652579.6109738",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo",
- "developer": "EpistemeAI2",
- "inference_platform": "unknown",
- "id": "EpistemeAI2/Fireball-Alpaca-Llama3.1.06-8B-Philos-dpo"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4865756193566404
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48807730539009225
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13066465256797583
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3931875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3614527925531915
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math/0115907a-a473-4f12-8f0b-5dafd729fc44.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math/0115907a-a473-4f12-8f0b-5dafd729fc44.json
deleted file mode 100644
index c149180bcb9e8e789a16738b82e612fea5612f47..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math/0115907a-a473-4f12-8f0b-5dafd729fc44.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math/1762652579.61236",
- "retrieved_timestamp": "1762652579.612361",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math",
- "developer": "EpistemeAI2",
- "inference_platform": "unknown",
- "id": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003-128K-code-math"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5515465631191904
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48075580310342053
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1351963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36925
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3420046542553192
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT/63b6d34d-1a59-40b6-b663-1d81544867f2.json b/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT/63b6d34d-1a59-40b6-b663-1d81544867f2.json
deleted file mode 100644
index 0cc466f16b93e83964519f8d42006821f557ba33..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/EpistemeAI2/EpistemeAI2_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT/63b6d34d-1a59-40b6-b663-1d81544867f2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI2_Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT/1762652579.6125782",
- "retrieved_timestamp": "1762652579.612579",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT",
- "developer": "EpistemeAI2",
- "inference_platform": "unknown",
- "id": "EpistemeAI2/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.005-128K-code-COT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4633195476890207
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4790834283312441
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11706948640483383
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31208053691275167
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37743750000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3564660904255319
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Eric111/Eric111_CatunaMayo-DPO/ef63850d-6acf-4d04-ac01-7ac407bf3b89.json b/leaderboard_data/HFOpenLLMv2/Eric111/Eric111_CatunaMayo-DPO/ef63850d-6acf-4d04-ac01-7ac407bf3b89.json
deleted file mode 100644
index 1d0067649dbeb839c7fa1d94b094a4765118035b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Eric111/Eric111_CatunaMayo-DPO/ef63850d-6acf-4d04-ac01-7ac407bf3b89.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Eric111_CatunaMayo-DPO/1762652579.613287",
- "retrieved_timestamp": "1762652579.613288",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Eric111/CatunaMayo-DPO",
- "developer": "Eric111",
- "inference_platform": "unknown",
- "id": "Eric111/CatunaMayo-DPO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4214539643700936
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5223991323844243
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08157099697885196
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44503125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3169880319148936
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Eric111/Eric111_CatunaMayo/9c2ab331-44f5-4306-a57c-5ddb0154ba63.json b/leaderboard_data/HFOpenLLMv2/Eric111/Eric111_CatunaMayo/9c2ab331-44f5-4306-a57c-5ddb0154ba63.json
deleted file mode 100644
index bc34dab4f5dc7962beff43d269051756f5f83f95..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Eric111/Eric111_CatunaMayo/9c2ab331-44f5-4306-a57c-5ddb0154ba63.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Eric111_CatunaMayo/1762652579.613048",
- "retrieved_timestamp": "1762652579.613049",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Eric111/CatunaMayo",
- "developer": "Eric111",
- "inference_platform": "unknown",
- "id": "Eric111/CatunaMayo"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4074156571231
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5243635518600797
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08459214501510574
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45398958333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3178191489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_Chocolatine-3B-Instruct-DPO-Revised-Ties-v2/80ff60c0-820c-425d-8b32-44fc61128c9f.json b/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_Chocolatine-3B-Instruct-DPO-Revised-Ties-v2/80ff60c0-820c-425d-8b32-44fc61128c9f.json
deleted file mode 100644
index 472ac84596958487ed417fff6206e46b207b2fe1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_Chocolatine-3B-Instruct-DPO-Revised-Ties-v2/80ff60c0-820c-425d-8b32-44fc61128c9f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Etherll_Chocolatine-3B-Instruct-DPO-Revised-Ties-v2/1762652579.613742",
- "retrieved_timestamp": "1762652579.613743",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2",
- "developer": "Etherll",
- "inference_platform": "unknown",
- "id": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37399322686028624
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5410649663618229
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16314199395770393
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3238255033557047
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4649375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39777260638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Phi3ForCausalLM",
- "params_billions": 3.821
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_Chocolatine-3B-Instruct-DPO-Revised-Ties/d3b94b8e-8612-4928-bdba-81226af143b2.json b/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_Chocolatine-3B-Instruct-DPO-Revised-Ties/d3b94b8e-8612-4928-bdba-81226af143b2.json
deleted file mode 100644
index 8064de8afb39da127dd2f2564c284b86180548e5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_Chocolatine-3B-Instruct-DPO-Revised-Ties/d3b94b8e-8612-4928-bdba-81226af143b2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Etherll_Chocolatine-3B-Instruct-DPO-Revised-Ties/1762652579.613493",
- "retrieved_timestamp": "1762652579.613494",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties",
- "developer": "Etherll",
- "inference_platform": "unknown",
- "id": "Etherll/Chocolatine-3B-Instruct-DPO-Revised-Ties"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3724694920588483
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5410649663618229
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16314199395770393
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3238255033557047
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4649375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39777260638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Phi3ForCausalLM",
- "params_billions": 3.821
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_Qwen2.5-Coder-7B-Instruct-Ties/ea9f32e5-431d-4573-9ac9-25ebfa9c2c9e.json b/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_Qwen2.5-Coder-7B-Instruct-Ties/ea9f32e5-431d-4573-9ac9-25ebfa9c2c9e.json
deleted file mode 100644
index a87095004387696d8a3908b63234defae811676e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_Qwen2.5-Coder-7B-Instruct-Ties/ea9f32e5-431d-4573-9ac9-25ebfa9c2c9e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Etherll_Qwen2.5-Coder-7B-Instruct-Ties/1762652579.61485",
- "retrieved_timestamp": "1762652579.614851",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Etherll/Qwen2.5-Coder-7B-Instruct-Ties",
- "developer": "Etherll",
- "inference_platform": "unknown",
- "id": "Etherll/Qwen2.5-Coder-7B-Instruct-Ties"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5005385709916355
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4895144464043051
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29154078549848944
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3296979865771812
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43728125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3503158244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_SuperHermes/a641d61c-aa42-4bce-afc0-ba7639f0a24e.json b/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_SuperHermes/a641d61c-aa42-4bce-afc0-ba7639f0a24e.json
deleted file mode 100644
index 7e1dc17091a6bd7833702ca60855425549f9aa4d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Etherll/Etherll_SuperHermes/a641d61c-aa42-4bce-afc0-ba7639f0a24e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Etherll_SuperHermes/1762652579.615286",
- "retrieved_timestamp": "1762652579.615287",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Etherll/SuperHermes",
- "developer": "Etherll",
- "inference_platform": "unknown",
- "id": "Etherll/SuperHermes"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5459015412438996
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5289531792679852
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16540785498489427
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3238255033557047
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44004166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39486369680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_Chocolatine-Fusion-14B/5d5a7561-8a41-48ea-ae1c-e986ac666f19.json b/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_Chocolatine-Fusion-14B/5d5a7561-8a41-48ea-ae1c-e986ac666f19.json
deleted file mode 100644
index 439d59adc85a764f01db3e474a4cbdcc88b6ee74..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_Chocolatine-Fusion-14B/5d5a7561-8a41-48ea-ae1c-e986ac666f19.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FINGU-AI_Chocolatine-Fusion-14B/1762652579.615752",
- "retrieved_timestamp": "1762652579.615752",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FINGU-AI/Chocolatine-Fusion-14B",
- "developer": "FINGU-AI",
- "inference_platform": "unknown",
- "id": "FINGU-AI/Chocolatine-Fusion-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6949028577507679
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.64132285324613
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3851963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3716442953020134
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49402083333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5261801861702128
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 8.367
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_L3-8B/f2a0c2ff-40a4-4a75-93ca-b611c4314dd5.json b/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_L3-8B/f2a0c2ff-40a4-4a75-93ca-b611c4314dd5.json
deleted file mode 100644
index 8f2b82f315fd5386fec635448b7184a4fd6529d2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_L3-8B/f2a0c2ff-40a4-4a75-93ca-b611c4314dd5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FINGU-AI_L3-8B/1762652579.615993",
- "retrieved_timestamp": "1762652579.615993",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FINGU-AI/L3-8B",
- "developer": "FINGU-AI",
- "inference_platform": "unknown",
- "id": "FINGU-AI/L3-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7517309627344335
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4985585187130108
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2545317220543807
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38283333333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36394614361702127
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_Q-Small-3B/11d9d5ea-29f2-412e-af48-858626ebeec5.json b/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_Q-Small-3B/11d9d5ea-29f2-412e-af48-858626ebeec5.json
deleted file mode 100644
index a78f891cc77e015dae1e004b251177f3d98169be..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_Q-Small-3B/11d9d5ea-29f2-412e-af48-858626ebeec5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FINGU-AI_Q-Small-3B/1762652579.616768",
- "retrieved_timestamp": "1762652579.61677",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FINGU-AI/Q-Small-3B",
- "developer": "FINGU-AI",
- "inference_platform": "unknown",
- "id": "FINGU-AI/Q-Small-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4145345461154182
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43185314557630744
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08308157099697885
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26677852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40054166666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27900598404255317
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_QwQ-Buddy-32B-Alpha/32836e5d-d413-4e40-8c9c-4cb8c3daa23a.json b/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_QwQ-Buddy-32B-Alpha/32836e5d-d413-4e40-8c9c-4cb8c3daa23a.json
deleted file mode 100644
index fe73df295f414c25f056ec7d67e7bd08e6422166..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_QwQ-Buddy-32B-Alpha/32836e5d-d413-4e40-8c9c-4cb8c3daa23a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FINGU-AI_QwQ-Buddy-32B-Alpha/1762652579.617035",
- "retrieved_timestamp": "1762652579.617036",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FINGU-AI/QwQ-Buddy-32B-Alpha",
- "developer": "FINGU-AI",
- "inference_platform": "unknown",
- "id": "FINGU-AI/QwQ-Buddy-32B-Alpha"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34464221598691475
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.642442234274039
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3851963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37919463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5059895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5294215425531915
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 19.662
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_RomboUltima-32B/65c5a05d-0b24-4767-88ff-24984fa0f988.json b/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_RomboUltima-32B/65c5a05d-0b24-4767-88ff-24984fa0f988.json
deleted file mode 100644
index 04017fd31edcd4baa875d28ac25180305ab169bc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_RomboUltima-32B/65c5a05d-0b24-4767-88ff-24984fa0f988.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FINGU-AI_RomboUltima-32B/1762652579.6173398",
- "retrieved_timestamp": "1762652579.617341",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FINGU-AI/RomboUltima-32B",
- "developer": "FINGU-AI",
- "inference_platform": "unknown",
- "id": "FINGU-AI/RomboUltima-32B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6671509372908327
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6938448333620042
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5385196374622356
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3716442953020134
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4836354166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.578873005319149
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 17.645
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_Ultimos-32B/fa69d78a-e112-45ff-80c3-b4eb30d83ed9.json b/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_Ultimos-32B/fa69d78a-e112-45ff-80c3-b4eb30d83ed9.json
deleted file mode 100644
index c94d143a566d6f2ed4eb2b317b6d34b3e3e2a02f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FINGU-AI/FINGU-AI_Ultimos-32B/fa69d78a-e112-45ff-80c3-b4eb30d83ed9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FINGU-AI_Ultimos-32B/1762652579.617578",
- "retrieved_timestamp": "1762652579.617579",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FINGU-AI/Ultimos-32B",
- "developer": "FINGU-AI",
- "inference_platform": "unknown",
- "id": "FINGU-AI/Ultimos-32B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1592197591280026
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2905531373728777
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24916107382550334
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32860416666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11112034574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 9.604
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FallenMerick/FallenMerick_Chewy-Lemon-Cookie-11B/f4f2289c-5b3c-4040-9e34-ac20352f45d7.json b/leaderboard_data/HFOpenLLMv2/FallenMerick/FallenMerick_Chewy-Lemon-Cookie-11B/f4f2289c-5b3c-4040-9e34-ac20352f45d7.json
deleted file mode 100644
index 979995a7ca74f3dcf405cbb5534dface2614c77b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FallenMerick/FallenMerick_Chewy-Lemon-Cookie-11B/f4f2289c-5b3c-4040-9e34-ac20352f45d7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FallenMerick_Chewy-Lemon-Cookie-11B/1762652579.6178062",
- "retrieved_timestamp": "1762652579.6178071",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FallenMerick/Chewy-Lemon-Cookie-11B",
- "developer": "FallenMerick",
- "inference_platform": "unknown",
- "id": "FallenMerick/Chewy-Lemon-Cookie-11B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4875242135312083
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5251122307375103
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.054380664652567974
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45455208333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3267121010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 10.732
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Felladrin/Felladrin_Llama-160M-Chat-v1/0885ef86-d7ef-4261-8ccd-f0391c42ffe4.json b/leaderboard_data/HFOpenLLMv2/Felladrin/Felladrin_Llama-160M-Chat-v1/0885ef86-d7ef-4261-8ccd-f0391c42ffe4.json
deleted file mode 100644
index ba4f2d907d94ced156e72daea85026c0b537d498..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Felladrin/Felladrin_Llama-160M-Chat-v1/0885ef86-d7ef-4261-8ccd-f0391c42ffe4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Felladrin_Llama-160M-Chat-v1/1762652579.618279",
- "retrieved_timestamp": "1762652579.61828",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Felladrin/Llama-160M-Chat-v1",
- "developer": "Felladrin",
- "inference_platform": "unknown",
- "id": "Felladrin/Llama-160M-Chat-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15754642127333254
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30360811146348365
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.006042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.366125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11361369680851063
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.162
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Felladrin/Felladrin_Minueza-32M-UltraChat/44324409-5cb3-438a-9751-9ee868b35233.json b/leaderboard_data/HFOpenLLMv2/Felladrin/Felladrin_Minueza-32M-UltraChat/44324409-5cb3-438a-9751-9ee868b35233.json
deleted file mode 100644
index 1c5a1d6c34e9991c08c4cae24dd66bb77606a563..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Felladrin/Felladrin_Minueza-32M-UltraChat/44324409-5cb3-438a-9751-9ee868b35233.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Felladrin_Minueza-32M-UltraChat/1762652579.6187",
- "retrieved_timestamp": "1762652579.6187022",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Felladrin/Minueza-32M-UltraChat",
- "developer": "Felladrin",
- "inference_platform": "unknown",
- "id": "Felladrin/Minueza-32M-UltraChat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13756277787381924
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2941478734048925
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.004531722054380665
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2558724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37418749999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11328125
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 0.033
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d37d499c-74cc-4fbb-9a3c-80776ebf2b82.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d37d499c-74cc-4fbb-9a3c-80776ebf2b82.json
deleted file mode 100644
index 256a6550122ab3fa717c4d0b58ee2326fc183aee..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d37d499c-74cc-4fbb-9a3c-80776ebf2b82.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/1762652579.618947",
- "retrieved_timestamp": "1762652579.618948",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/100k_fineweb_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30832191917445706
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3323387445789459
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04078549848942598
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26929530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33021875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14976728723404256
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.5
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/fc62bbce-e2e4-4b41-b632-a09eb8b0a4d6.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/fc62bbce-e2e4-4b41-b632-a09eb8b0a4d6.json
deleted file mode 100644
index dd47a9c7ce586d2c390cdcefe2910a00c9af41f2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/fc62bbce-e2e4-4b41-b632-a09eb8b0a4d6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/1762652579.619225",
- "retrieved_timestamp": "1762652579.6192262",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/10k_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.509730847484674
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5214989784123593
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09743202416918428
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43095833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37691156914893614
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 16.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/157d1e12-ced4-4b48-a651-5671a2b85ee6.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/157d1e12-ced4-4b48-a651-5671a2b85ee6.json
deleted file mode 100644
index 406cf79d502f0a6056676fa4b345760b47dd2dd6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/157d1e12-ced4-4b48-a651-5671a2b85ee6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/1762652579.619448",
- "retrieved_timestamp": "1762652579.6194491",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/10k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28154408081667753
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3305518729746925
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.030966767371601207
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33021875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15408909574468085
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.5
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/5450695c-a1fd-431f-9201-19d858e48867.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/5450695c-a1fd-431f-9201-19d858e48867.json
deleted file mode 100644
index 05e04bde570706a006d697e9911bb4d3266fa49e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/5450695c-a1fd-431f-9201-19d858e48867.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/1762652579.619661",
- "retrieved_timestamp": "1762652579.619661",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/40k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3015775919006015
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33246082656550385
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03323262839879154
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3408229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14852061170212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.5
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d780dd37-3e71-400a-93be-f9512ad77d3e.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d780dd37-3e71-400a-93be-f9512ad77d3e.json
deleted file mode 100644
index 4824ff032d3216165d13d1beaeb22de85dc7bcfa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/d780dd37-3e71-400a-93be-f9512ad77d3e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit/1762652579.619875",
- "retrieved_timestamp": "1762652579.6198761",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/83k_continued_pretraining_Qwen2.5-0.5B-Instruct_Unsloth_merged_16bit"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28693976426991497
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33465340701604496
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.030211480362537766
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27348993288590606
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3289479166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15550199468085107
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.5
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1000k_fineweb/4ba295dd-43f3-45d6-8abe-58cd6fb11eee.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1000k_fineweb/4ba295dd-43f3-45d6-8abe-58cd6fb11eee.json
deleted file mode 100644
index 09772d0cb7766d3ec9403e0da95dd07f9d29f5a9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1000k_fineweb/4ba295dd-43f3-45d6-8abe-58cd6fb11eee.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1000k_fineweb/1762652579.620099",
- "retrieved_timestamp": "1762652579.6201",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/smollm2-135M_pretrained_1000k_fineweb",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14845388014911545
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2917939408206228
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.00906344410876133
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35806249999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1163563829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed/7d967a13-3d40-4a9c-ac1d-956c2b2b6b98.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed/7d967a13-3d40-4a9c-ac1d-956c2b2b6b98.json
deleted file mode 100644
index 9820bbe9828317f76fff3802fa4256182e4216cd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed/7d967a13-3d40-4a9c-ac1d-956c2b2b6b98.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed/1762652579.620331",
- "retrieved_timestamp": "1762652579.620332",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_human_removed"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15537329840379083
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3066426145674803
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.006042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25083892617449666
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35803125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11427859042553191
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1000k_fineweb_uncovai_selected/93f69ae3-c779-4f6b-8ac9-9bd8478e7eb2.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1000k_fineweb_uncovai_selected/93f69ae3-c779-4f6b-8ac9-9bd8478e7eb2.json
deleted file mode 100644
index 80137837760b9dae3db916bc7d42447b44020c57..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1000k_fineweb_uncovai_selected/93f69ae3-c779-4f6b-8ac9-9bd8478e7eb2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1000k_fineweb_uncovai_selected/1762652579.62055",
- "retrieved_timestamp": "1762652579.6205509",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/smollm2-135M_pretrained_1000k_fineweb_uncovai_selected"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14678054229444543
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29317781029884354
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.006797583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4047604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11569148936170212
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1200k_fineweb/3b102085-a3f6-4da6-abdf-f906f0b37f3c.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1200k_fineweb/3b102085-a3f6-4da6-abdf-f906f0b37f3c.json
deleted file mode 100644
index 77be3598cc29b8f42b4b4d746cd641073d608842..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1200k_fineweb/3b102085-a3f6-4da6-abdf-f906f0b37f3c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1200k_fineweb/1762652579.620773",
- "retrieved_timestamp": "1762652579.620773",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/smollm2-135M_pretrained_1200k_fineweb",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15809607397261488
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29409841468035297
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.006797583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3713645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10762965425531915
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed/c8e1bfa5-d1dc-4bcb-9b91-397302006b1d.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed/c8e1bfa5-d1dc-4bcb-9b91-397302006b1d.json
deleted file mode 100644
index 8f23adca06217fc6017d55c116170176ff996f1d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed/c8e1bfa5-d1dc-4bcb-9b91-397302006b1d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed/1762652579.6209762",
- "retrieved_timestamp": "1762652579.620977",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_human_removed"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.157771379938563
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29496212100634955
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0007552870090634441
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36999999999999994
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11394614361702128
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1200k_fineweb_uncovai_selected/d4dabe47-4bc9-46fe-8c2d-206d5ed8874a.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1200k_fineweb_uncovai_selected/d4dabe47-4bc9-46fe-8c2d-206d5ed8874a.json
deleted file mode 100644
index 2afcb49f46abe1dd57403b06f50d4a90d7868193..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1200k_fineweb_uncovai_selected/d4dabe47-4bc9-46fe-8c2d-206d5ed8874a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1200k_fineweb_uncovai_selected/1762652579.6211882",
- "retrieved_timestamp": "1762652579.6211882",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/smollm2-135M_pretrained_1200k_fineweb_uncovai_selected"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15847063569107744
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29604672415652145
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0075528700906344415
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3567291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11643949468085106
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1400k_fineweb/c5cb1709-7ba4-438c-8af7-d96cb4ab4ad0.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1400k_fineweb/c5cb1709-7ba4-438c-8af7-d96cb4ab4ad0.json
deleted file mode 100644
index 47ea25b72e9557fb9a8a40adb00c0681ee5e42b7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1400k_fineweb/c5cb1709-7ba4-438c-8af7-d96cb4ab4ad0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1400k_fineweb/1762652579.6213892",
- "retrieved_timestamp": "1762652579.62139",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/smollm2-135M_pretrained_1400k_fineweb",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17638089158987041
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2921781950918249
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.011329305135951661
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3873333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1079621010638298
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed/75cbe3a2-cbfa-482b-8c35-b74caf046df8.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed/75cbe3a2-cbfa-482b-8c35-b74caf046df8.json
deleted file mode 100644
index 842e04193008e57fe2e5c6479d90a8a47babe2e5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed/75cbe3a2-cbfa-482b-8c35-b74caf046df8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed/1762652579.621598",
- "retrieved_timestamp": "1762652579.621599",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_human_removed"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17066051410258115
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2992388897714206
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.010574018126888218
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3939375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11045545212765957
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1400k_fineweb_uncovai_selected/062fa044-0fd4-49ea-988d-f477c7930496.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1400k_fineweb_uncovai_selected/062fa044-0fd4-49ea-988d-f477c7930496.json
deleted file mode 100644
index 8cd9a2adf4e6a285bcc79c62adeac063c3e974f5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_1400k_fineweb_uncovai_selected/062fa044-0fd4-49ea-988d-f477c7930496.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_1400k_fineweb_uncovai_selected/1762652579.621813",
- "retrieved_timestamp": "1762652579.621814",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/smollm2-135M_pretrained_1400k_fineweb_uncovai_selected"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15384956360235286
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.291672957517483
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.010574018126888218
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37406249999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11369680851063829
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed/af001f63-a060-49ec-9bd3-f06b2ad96dc8.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed/af001f63-a060-49ec-9bd3-f06b2ad96dc8.json
deleted file mode 100644
index 4f70179c1aa09809cacf1f4ed9d2ed113f2fc059..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed/af001f63-a060-49ec-9bd3-f06b2ad96dc8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed/1762652579.622025",
- "retrieved_timestamp": "1762652579.622026",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_human_removed"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14747979804695985
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30287372123209483
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0037764350453172208
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25838926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35784375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11195146276595745
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_200k_fineweb_uncovai_selected/556e1124-135e-473f-9e62-852f095b3118.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_200k_fineweb_uncovai_selected/556e1124-135e-473f-9e62-852f095b3118.json
deleted file mode 100644
index b6e838c0397d801dfeee07a4d51035720df093d4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_200k_fineweb_uncovai_selected/556e1124-135e-473f-9e62-852f095b3118.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_200k_fineweb_uncovai_selected/1762652579.622248",
- "retrieved_timestamp": "1762652579.622248",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/smollm2-135M_pretrained_200k_fineweb_uncovai_selected"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13451530827094332
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2927186496606003
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0075528700906344415
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25083892617449666
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36603125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11311502659574468
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_400k_fineweb/982d6727-aa6c-41fe-abe7-47811ad3c9da.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_400k_fineweb/982d6727-aa6c-41fe-abe7-47811ad3c9da.json
deleted file mode 100644
index 2cf1def1ef6cf2b2c605ea1417b99033285c4597..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_400k_fineweb/982d6727-aa6c-41fe-abe7-47811ad3c9da.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_400k_fineweb/1762652579.62247",
- "retrieved_timestamp": "1762652579.62247",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/smollm2-135M_pretrained_400k_fineweb",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/smollm2-135M_pretrained_400k_fineweb"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1511267880335288
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29723404576965046
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.012084592145015106
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2525167785234899
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3794270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11627327127659574
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed/7b8f532b-c3a5-48fe-9d3f-e9c8b6f6897d.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed/7b8f532b-c3a5-48fe-9d3f-e9c8b6f6897d.json
deleted file mode 100644
index d17ad513fada11815807b510af6061e1a010bf4d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed/7b8f532b-c3a5-48fe-9d3f-e9c8b6f6897d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed/1762652579.622689",
- "retrieved_timestamp": "1762652579.62269",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_human_removed"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.155648124753432
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3048804422828362
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.00906344410876133
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2550335570469799
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38599999999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11377992021276596
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_400k_fineweb_uncovai_selected/1ce9e40f-5613-4d95-b451-a34f3feb961e.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_400k_fineweb_uncovai_selected/1ce9e40f-5613-4d95-b451-a34f3feb961e.json
deleted file mode 100644
index db63b0194d5d1c02b15f7019e255e105d586260d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_400k_fineweb_uncovai_selected/1ce9e40f-5613-4d95-b451-a34f3feb961e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_400k_fineweb_uncovai_selected/1762652579.62291",
- "retrieved_timestamp": "1762652579.622911",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/smollm2-135M_pretrained_400k_fineweb_uncovai_selected"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15842076800666677
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2925171720555518
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.006797583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25419463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38199999999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1157746010638298
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_600k_fineweb/bf6d3042-aa42-45b5-8bb1-49a8c5e2fd50.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_600k_fineweb/bf6d3042-aa42-45b5-8bb1-49a8c5e2fd50.json
deleted file mode 100644
index 3e1fc5d93b905c96085947364e50df49f9dd4ffc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_600k_fineweb/bf6d3042-aa42-45b5-8bb1-49a8c5e2fd50.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_600k_fineweb/1762652579.623165",
- "retrieved_timestamp": "1762652579.6231658",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/smollm2-135M_pretrained_600k_fineweb",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/smollm2-135M_pretrained_600k_fineweb"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16391618682872555
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3013718229200533
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.006042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38085416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11261635638297872
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed/4446e0a4-abdc-48a4-83f7-cc3d4aeede78.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed/4446e0a4-abdc-48a4-83f7-cc3d4aeede78.json
deleted file mode 100644
index e24b2382d0ae87ff1d159a009bfe1d8a01d62b13..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed/4446e0a4-abdc-48a4-83f7-cc3d4aeede78.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed/1762652579.623383",
- "retrieved_timestamp": "1762652579.623384",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_human_removed"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16414114549395603
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30001678726257036
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.00906344410876133
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3793333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1146941489361702
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_600k_fineweb_uncovai_selected/52f63809-1390-4a66-8ae2-8f150425d2d9.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_600k_fineweb_uncovai_selected/52f63809-1390-4a66-8ae2-8f150425d2d9.json
deleted file mode 100644
index 21689ed6598be421af00f16fedc67a12fe17150a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_600k_fineweb_uncovai_selected/52f63809-1390-4a66-8ae2-8f150425d2d9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_600k_fineweb_uncovai_selected/1762652579.623598",
- "retrieved_timestamp": "1762652579.623599",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/smollm2-135M_pretrained_600k_fineweb_uncovai_selected"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16059389087620846
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2983444769655102
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0075528700906344415
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3846354166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11619015957446809
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_800k_fineweb/6b7b5025-01c0-470b-8856-b628b11f4e6c.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_800k_fineweb/6b7b5025-01c0-470b-8856-b628b11f4e6c.json
deleted file mode 100644
index 3495f58cc736e411438967aa5f53ba9e4031469a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_800k_fineweb/6b7b5025-01c0-470b-8856-b628b11f4e6c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_800k_fineweb/1762652579.623817",
- "retrieved_timestamp": "1762652579.623818",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/smollm2-135M_pretrained_800k_fineweb",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/smollm2-135M_pretrained_800k_fineweb"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16414114549395603
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29594449748780255
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.008308157099697885
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24916107382550334
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.370125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11519281914893617
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed/b85e5d55-dbdd-4383-ac86-75c83648c522.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed/b85e5d55-dbdd-4383-ac86-75c83648c522.json
deleted file mode 100644
index 4d2efbda236dccea1241926f133196ab1951e2a0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed/b85e5d55-dbdd-4383-ac86-75c83648c522.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed/1762652579.62404",
- "retrieved_timestamp": "1762652579.6240408",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_human_removed"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1622927166584662
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3038096660271284
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.006797583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2525167785234899
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3992708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11377992021276596
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_800k_fineweb_uncovai_selected/dcddcf2f-f3fe-4f45-8c42-e95b1ac99d88.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_800k_fineweb_uncovai_selected/dcddcf2f-f3fe-4f45-8c42-e95b1ac99d88.json
deleted file mode 100644
index 052ad6c7aab8af7e1ea3d3f727775ec7ada6bb8a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2-135M_pretrained_800k_fineweb_uncovai_selected/dcddcf2f-f3fe-4f45-8c42-e95b1ac99d88.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_smollm2-135M_pretrained_800k_fineweb_uncovai_selected/1762652579.624255",
- "retrieved_timestamp": "1762652579.624256",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/smollm2-135M_pretrained_800k_fineweb_uncovai_selected"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14742993036254914
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2942808065535252
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.004531722054380665
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3766354166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11303191489361702
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2_pretrained_200k_fineweb/3d10ce78-6474-48c0-8eb3-c5b7146d3e06.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2_pretrained_200k_fineweb/3d10ce78-6474-48c0-8eb3-c5b7146d3e06.json
deleted file mode 100644
index f7bd1b3ffcd6327d3c6e24ee2f44a8a320154011..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_smollm2_pretrained_200k_fineweb/3d10ce78-6474-48c0-8eb3-c5b7146d3e06.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_smollm2_pretrained_200k_fineweb/1762652579.624471",
- "retrieved_timestamp": "1762652579.624471",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/smollm2_pretrained_200k_fineweb",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/smollm2_pretrained_200k_fineweb"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15270039051937748
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.299468427221449
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0037764350453172208
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24748322147651006
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3699375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11594082446808511
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/41e2bd81-2369-416a-9287-021872efd931.json b/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/41e2bd81-2369-416a-9287-021872efd931.json
deleted file mode 100644
index c07e5af67aae85dbe8e7afa640ad7c171c2d1432..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FlofloB/FlofloB_test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/41e2bd81-2369-416a-9287-021872efd931.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FlofloB_test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit/1762652579.6246889",
- "retrieved_timestamp": "1762652579.6246898",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit",
- "developer": "FlofloB",
- "inference_platform": "unknown",
- "id": "FlofloB/test_continued_pretraining_Phi-3-mini-4k-instruct_Unsloth_merged_16bit"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.521546164177715
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5240829189778252
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11027190332326284
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.311241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42441666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3720910904255319
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 16.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FuJhen/FuJhen_ft-openhermes-25-mistral-7b-irca-dpo-pairs/bfaec047-518f-42a0-93a1-c6bda3589c26.json b/leaderboard_data/HFOpenLLMv2/FuJhen/FuJhen_ft-openhermes-25-mistral-7b-irca-dpo-pairs/bfaec047-518f-42a0-93a1-c6bda3589c26.json
deleted file mode 100644
index c4c2972c9ab9de2c30c18030400c59fd3a28de76..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FuJhen/FuJhen_ft-openhermes-25-mistral-7b-irca-dpo-pairs/bfaec047-518f-42a0-93a1-c6bda3589c26.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FuJhen_ft-openhermes-25-mistral-7b-irca-dpo-pairs/1762652579.624908",
- "retrieved_timestamp": "1762652579.6249092",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs",
- "developer": "FuJhen",
- "inference_platform": "unknown",
- "id": "FuJhen/ft-openhermes-25-mistral-7b-irca-dpo-pairs"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5420041046645123
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47730323895548116
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04833836858006042
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2785234899328859
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.417375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2956283244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "?",
- "params_billions": 14.483
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FuJhen/FuJhen_mistral-instruct-7B-DPO/5f79d177-3ca8-4c95-83bb-2abb0e803e72.json b/leaderboard_data/HFOpenLLMv2/FuJhen/FuJhen_mistral-instruct-7B-DPO/5f79d177-3ca8-4c95-83bb-2abb0e803e72.json
deleted file mode 100644
index efbdfe34b29635735b4a6f5304293d5a62e166b7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FuJhen/FuJhen_mistral-instruct-7B-DPO/5f79d177-3ca8-4c95-83bb-2abb0e803e72.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FuJhen_mistral-instruct-7B-DPO/1762652579.625171",
- "retrieved_timestamp": "1762652579.625172",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FuJhen/mistral-instruct-7B-DPO",
- "developer": "FuJhen",
- "inference_platform": "unknown",
- "id": "FuJhen/mistral-instruct-7B-DPO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49684171332065585
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46239050561386214
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03851963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4015625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30335771276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "?",
- "params_billions": 14.496
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-7B-v2.0/26ca0085-db25-4664-823a-f56e08081dc4.json b/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-7B-v2.0/26ca0085-db25-4664-823a-f56e08081dc4.json
deleted file mode 100644
index 4eec917d1d1f5f3e45ee3c383c0a26b9223c7b0c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-7B-v2.0/26ca0085-db25-4664-823a-f56e08081dc4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FuseAI_FuseChat-7B-v2.0/1762652579.625878",
- "retrieved_timestamp": "1762652579.625879",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FuseAI/FuseChat-7B-v2.0",
- "developer": "FuseAI",
- "inference_platform": "unknown",
- "id": "FuseAI/FuseChat-7B-v2.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3423194900641409
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4954212795868764
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06117824773413897
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4796666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162400265957447
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-Llama-3.1-8B-Instruct/fdc9ea4d-acf8-4f2c-b727-482f464eb925.json b/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-Llama-3.1-8B-Instruct/fdc9ea4d-acf8-4f2c-b727-482f464eb925.json
deleted file mode 100644
index b13fd775cf5b39aea093df08b73c5d9e4ccf75cd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-Llama-3.1-8B-Instruct/fdc9ea4d-acf8-4f2c-b727-482f464eb925.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FuseAI_FuseChat-Llama-3.1-8B-Instruct/1762652579.626143",
- "retrieved_timestamp": "1762652579.626144",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FuseAI/FuseChat-Llama-3.1-8B-Instruct",
- "developer": "FuseAI",
- "inference_platform": "unknown",
- "id": "FuseAI/FuseChat-Llama-3.1-8B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7204816553411615
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5119887898349903
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24773413897280966
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38200000000000006
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37333776595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-Llama-3.2-3B-Instruct/e39160a3-8332-467d-900f-52bb7d1446c1.json b/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-Llama-3.2-3B-Instruct/e39160a3-8332-467d-900f-52bb7d1446c1.json
deleted file mode 100644
index 5f3629f26440d69425f4e727aea4a560b2e5ddb1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-Llama-3.2-3B-Instruct/e39160a3-8332-467d-900f-52bb7d1446c1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FuseAI_FuseChat-Llama-3.2-3B-Instruct/1762652579.626356",
- "retrieved_timestamp": "1762652579.626357",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FuseAI/FuseChat-Llama-3.2-3B-Instruct",
- "developer": "FuseAI",
- "inference_platform": "unknown",
- "id": "FuseAI/FuseChat-Llama-3.2-3B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.684886102208806
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46583679221755164
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24244712990936557
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39139583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31316489361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-Qwen-2.5-7B-Instruct/1bae6b5e-47b0-4fe2-847a-8aec0a36342e.json b/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-Qwen-2.5-7B-Instruct/1bae6b5e-47b0-4fe2-847a-8aec0a36342e.json
deleted file mode 100644
index 85c5c73dbb6e4fc92f67bc66025a96f6ae54bad1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/FuseAI/FuseAI_FuseChat-Qwen-2.5-7B-Instruct/1bae6b5e-47b0-4fe2-847a-8aec0a36342e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/FuseAI_FuseChat-Qwen-2.5-7B-Instruct/1762652579.626579",
- "retrieved_timestamp": "1762652579.626579",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "FuseAI/FuseChat-Qwen-2.5-7B-Instruct",
- "developer": "FuseAI",
- "inference_platform": "unknown",
- "id": "FuseAI/FuseChat-Qwen-2.5-7B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5905641475728844
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.552599883615556
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4561933534743202
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3873645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41181848404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/GalrionSoftworks/GalrionSoftworks_MN-LooseCannon-12B-v1/eb76e049-3a5d-4786-9724-800b719a6113.json b/leaderboard_data/HFOpenLLMv2/GalrionSoftworks/GalrionSoftworks_MN-LooseCannon-12B-v1/eb76e049-3a5d-4786-9724-800b719a6113.json
deleted file mode 100644
index 84e651f7a495168528f7f35b30de71e94e8ec84d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/GalrionSoftworks/GalrionSoftworks_MN-LooseCannon-12B-v1/eb76e049-3a5d-4786-9724-800b719a6113.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/GalrionSoftworks_MN-LooseCannon-12B-v1/1762652579.626794",
- "retrieved_timestamp": "1762652579.626794",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "GalrionSoftworks/MN-LooseCannon-12B-v1",
- "developer": "GalrionSoftworks",
- "inference_platform": "unknown",
- "id": "GalrionSoftworks/MN-LooseCannon-12B-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5417791459992819
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5128183808679557
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08534743202416918
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41384375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3195644946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/GalrionSoftworks/GalrionSoftworks_MagnusIntellectus-12B-v1/99a948ab-cc5b-4f3a-aae0-684cbfb6ffb3.json b/leaderboard_data/HFOpenLLMv2/GalrionSoftworks/GalrionSoftworks_MagnusIntellectus-12B-v1/99a948ab-cc5b-4f3a-aae0-684cbfb6ffb3.json
deleted file mode 100644
index a31127a1ad2b0f5360ff5869f9e5674565472a55..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/GalrionSoftworks/GalrionSoftworks_MagnusIntellectus-12B-v1/99a948ab-cc5b-4f3a-aae0-684cbfb6ffb3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/GalrionSoftworks_MagnusIntellectus-12B-v1/1762652579.62705",
- "retrieved_timestamp": "1762652579.627051",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "GalrionSoftworks/MagnusIntellectus-12B-v1",
- "developer": "GalrionSoftworks",
- "inference_platform": "unknown",
- "id": "GalrionSoftworks/MagnusIntellectus-12B-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4421368635221213
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5323010476246133
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0649546827794562
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4428020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34208776595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/GoToCompany/GoToCompany_gemma2-9b-cpt-sahabatai-v1-instruct/68ff0a5c-9e76-410b-a4e3-4b7de0e7fe35.json b/leaderboard_data/HFOpenLLMv2/GoToCompany/GoToCompany_gemma2-9b-cpt-sahabatai-v1-instruct/68ff0a5c-9e76-410b-a4e3-4b7de0e7fe35.json
deleted file mode 100644
index 562320e4ad7c8bded4a52ce3633fa45aa0a3566a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/GoToCompany/GoToCompany_gemma2-9b-cpt-sahabatai-v1-instruct/68ff0a5c-9e76-410b-a4e3-4b7de0e7fe35.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/GoToCompany_gemma2-9b-cpt-sahabatai-v1-instruct/1762652579.628178",
- "retrieved_timestamp": "1762652579.628178",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct",
- "developer": "GoToCompany",
- "inference_platform": "unknown",
- "id": "GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6550607942481504
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5954551751157878
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2054380664652568
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3347315436241611
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4778645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4263630319148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/GoToCompany/GoToCompany_llama3-8b-cpt-sahabatai-v1-instruct/aa363693-a300-4545-b7f3-05492646c202.json b/leaderboard_data/HFOpenLLMv2/GoToCompany/GoToCompany_llama3-8b-cpt-sahabatai-v1-instruct/aa363693-a300-4545-b7f3-05492646c202.json
deleted file mode 100644
index 3d9a9493e2b219bbd8a54ce589a4276beb646053..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/GoToCompany/GoToCompany_llama3-8b-cpt-sahabatai-v1-instruct/aa363693-a300-4545-b7f3-05492646c202.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/GoToCompany_llama3-8b-cpt-sahabatai-v1-instruct/1762652579.628486",
- "retrieved_timestamp": "1762652579.628489",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct",
- "developer": "GoToCompany",
- "inference_platform": "unknown",
- "id": "GoToCompany/llama3-8b-cpt-sahabatai-v1-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.523844510343666
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4951292004509417
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12764350453172205
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26677852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44884375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3453291223404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1b9a4b84-1766-49ca-bd11-17a2340b9736.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1b9a4b84-1766-49ca-bd11-17a2340b9736.json
deleted file mode 100644
index 9bc0ceeacad943232fae7d894d851ef3c7551917..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1b9a4b84-1766-49ca-bd11-17a2340b9736.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1762652579.6293938",
- "retrieved_timestamp": "1762652579.629396",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1",
- "developer": "Goekdeniz-Guelmez",
- "inference_platform": "unknown",
- "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3416944817528602
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32921013057720044
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.002265861027190332
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3249166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16381316489361702
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/235adbd2-8128-4428-af57-8d8e310ba56f.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/235adbd2-8128-4428-af57-8d8e310ba56f.json
deleted file mode 100644
index 055a032a4dc2b25168db9406e574d2478162cd00..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/235adbd2-8128-4428-af57-8d8e310ba56f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1/1762652579.629041",
- "retrieved_timestamp": "1762652579.629042",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1",
- "developer": "Goekdeniz-Guelmez",
- "inference_platform": "unknown",
- "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-0.5B-Instruct-abliterated-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.347189900574919
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32683063456958195
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0891238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2516778523489933
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16414561170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1/a82acc9c-4093-4e0d-a862-7d6eb3cb7146.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1/a82acc9c-4093-4e0d-a862-7d6eb3cb7146.json
deleted file mode 100644
index 7615c63553a3c8143ead5891e01816d48e26fc81..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1/a82acc9c-4093-4e0d-a862-7d6eb3cb7146.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1/1762652579.629639",
- "retrieved_timestamp": "1762652579.6296399",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1",
- "developer": "Goekdeniz-Guelmez",
- "inference_platform": "unknown",
- "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47685806992114255
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.418600731531926
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2084592145015106
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24328859060402686
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3674895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27825797872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2/baae7cee-8b76-456f-96dc-5ac900a9a36e.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2/baae7cee-8b76-456f-96dc-5ac900a9a36e.json
deleted file mode 100644
index 57edbee283ea99378232e38e56948f84a04bd94f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2/baae7cee-8b76-456f-96dc-5ac900a9a36e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2/1762652579.629877",
- "retrieved_timestamp": "1762652579.629878",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2",
- "developer": "Goekdeniz-Guelmez",
- "inference_platform": "unknown",
- "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.421553699738915
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40418921704436744
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1268882175226586
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23993288590604026
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37685416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25615026595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3/9363a90d-6ec7-4de2-af17-a3e3e25de7d9.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3/9363a90d-6ec7-4de2-af17-a3e3e25de7d9.json
deleted file mode 100644
index b1f8629d0e10d31e0d383e0b9940f445ae91d80d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3/9363a90d-6ec7-4de2-af17-a3e3e25de7d9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3/1762652579.630181",
- "retrieved_timestamp": "1762652579.6301818",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3",
- "developer": "Goekdeniz-Guelmez",
- "inference_platform": "unknown",
- "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-1.5B-Instruct-abliterated-v3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42525055740989465
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4053446177133173
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13066465256797583
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24328859060402686
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37018749999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25556848404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-14B-Instruct-abliterated-v4/af440c67-78de-4053-98d8-8cded9657860.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-14B-Instruct-abliterated-v4/af440c67-78de-4053-98d8-8cded9657860.json
deleted file mode 100644
index 1d5b274dfddd02762faacdcd25a011ceb251ddb9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-14B-Instruct-abliterated-v4/af440c67-78de-4053-98d8-8cded9657860.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-14B-Instruct-abliterated-v4/1762652579.6304152",
- "retrieved_timestamp": "1762652579.630416",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4",
- "developer": "Goekdeniz-Guelmez",
- "inference_platform": "unknown",
- "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-14B-Instruct-abliterated-v4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8291666112581284
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6355637424320617
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5422960725075529
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3422818791946309
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4286666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5018284574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/9c443687-99df-4cd9-8e19-d40cd83b30bc.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/9c443687-99df-4cd9-8e19-d40cd83b30bc.json
deleted file mode 100644
index a19e25a032081d591df4268a94210ed485a2ecaf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/9c443687-99df-4cd9-8e19-d40cd83b30bc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/1762652579.630644",
- "retrieved_timestamp": "1762652579.630645",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2",
- "developer": "Goekdeniz-Guelmez",
- "inference_platform": "unknown",
- "id": "Goekdeniz-Guelmez/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7813811797142693
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5309672164610734
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45317220543806647
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43539583333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4119847074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_j.o.s.i.e.v4o-1.5b-dpo-stage1-v1/b6bf7c36-006c-4256-a315-1de70e2540c3.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_j.o.s.i.e.v4o-1.5b-dpo-stage1-v1/b6bf7c36-006c-4256-a315-1de70e2540c3.json
deleted file mode 100644
index 2714c553c725a848c14c6856838e4381b78fcb33..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_j.o.s.i.e.v4o-1.5b-dpo-stage1-v1/b6bf7c36-006c-4256-a315-1de70e2540c3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_j.o.s.i.e.v4o-1.5b-dpo-stage1-v1/1762652579.631213",
- "retrieved_timestamp": "1762652579.631215",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1",
- "developer": "Goekdeniz-Guelmez",
- "inference_platform": "unknown",
- "id": "Goekdeniz-Guelmez/j.o.s.i.e.v4o-1.5b-dpo-stage1-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41883092417009093
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41242101633634826
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12009063444108761
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25083892617449666
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3528541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2554853723404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-3b-v6.0/89947a58-5e39-468e-bbbc-2f3556a1c8f1.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-3b-v6.0/89947a58-5e39-468e-bbbc-2f3556a1c8f1.json
deleted file mode 100644
index 3546b84c26ad4a8ffda166bcc266e760b6ffae15..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-3b-v6.0/89947a58-5e39-468e-bbbc-2f3556a1c8f1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_josie-3b-v6.0/1762652579.631514",
- "retrieved_timestamp": "1762652579.6315148",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Goekdeniz-Guelmez/josie-3b-v6.0",
- "developer": "Goekdeniz-Guelmez",
- "inference_platform": "unknown",
- "id": "Goekdeniz-Guelmez/josie-3b-v6.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6009554648333089
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4496147842264783
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2938066465256798
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.386125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32197473404255317
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-7b-v6.0-step2000/7c2cc003-fab3-4fc9-a6b6-fb7075261e50.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-7b-v6.0-step2000/7c2cc003-fab3-4fc9-a6b6-fb7075261e50.json
deleted file mode 100644
index 5c185a0a0df65161af538c76ce7b2ad37cada907..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-7b-v6.0-step2000/7c2cc003-fab3-4fc9-a6b6-fb7075261e50.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_josie-7b-v6.0-step2000/1762652579.6322381",
- "retrieved_timestamp": "1762652579.632239",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000",
- "developer": "Goekdeniz-Guelmez",
- "inference_platform": "unknown",
- "id": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7597740661444966
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.510712680636641
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42371601208459214
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27684563758389263
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45393750000000005
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4011801861702128
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-7b-v6.0-step2000/90d4e4e1-2185-4d21-8730-f1a4bf413157.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-7b-v6.0-step2000/90d4e4e1-2185-4d21-8730-f1a4bf413157.json
deleted file mode 100644
index 4d912cb62ac0631fd05c78a282278b8e8fffabf5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-7b-v6.0-step2000/90d4e4e1-2185-4d21-8730-f1a4bf413157.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_josie-7b-v6.0-step2000/1762652579.632",
- "retrieved_timestamp": "1762652579.632001",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000",
- "developer": "Goekdeniz-Guelmez",
- "inference_platform": "unknown",
- "id": "Goekdeniz-Guelmez/josie-7b-v6.0-step2000"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7627716680629618
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5097811950503962
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45793750000000005
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40325797872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-7b-v6.0/aa158f5d-94a5-4f40-8a65-87fe9605abc1.json b/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-7b-v6.0/aa158f5d-94a5-4f40-8a65-87fe9605abc1.json
deleted file mode 100644
index e7aa100ccd96013ee990d8f1af8485242713abce..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Goekdeniz-Guelmez/Goekdeniz-Guelmez_josie-7b-v6.0/aa158f5d-94a5-4f40-8a65-87fe9605abc1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Goekdeniz-Guelmez_josie-7b-v6.0/1762652579.631763",
- "retrieved_timestamp": "1762652579.631764",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Goekdeniz-Guelmez/josie-7b-v6.0",
- "developer": "Goekdeniz-Guelmez",
- "inference_platform": "unknown",
- "id": "Goekdeniz-Guelmez/josie-7b-v6.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7411645544931892
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5104855208094123
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43580060422960726
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41539583333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3806515957446808
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/GreenNode/GreenNode_GreenNode-small-9B-it/d13def83-5ff8-4cde-aef5-b3c268c40c16.json b/leaderboard_data/HFOpenLLMv2/GreenNode/GreenNode_GreenNode-small-9B-it/d13def83-5ff8-4cde-aef5-b3c268c40c16.json
deleted file mode 100644
index 61f9284c1007b8df21f6c1e4ad1395ec95bf83e7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/GreenNode/GreenNode_GreenNode-small-9B-it/d13def83-5ff8-4cde-aef5-b3c268c40c16.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/GreenNode_GreenNode-small-9B-it/1762652579.6324449",
- "retrieved_timestamp": "1762652579.632446",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "GreenNode/GreenNode-small-9B-it",
- "developer": "GreenNode",
- "inference_platform": "unknown",
- "id": "GreenNode/GreenNode-small-9B-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7436125037123721
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.599383874005197
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17447129909365558
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42041666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3927027925531915
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/GritLM/GritLM_GritLM-7B-KTO/6d7f26d7-2336-4def-9d17-09d30a89e02d.json b/leaderboard_data/HFOpenLLMv2/GritLM/GritLM_GritLM-7B-KTO/6d7f26d7-2336-4def-9d17-09d30a89e02d.json
deleted file mode 100644
index e9bc8277c182871d42e52831dbb00d096233b4d3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/GritLM/GritLM_GritLM-7B-KTO/6d7f26d7-2336-4def-9d17-09d30a89e02d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/GritLM_GritLM-7B-KTO/1762652579.632807",
- "retrieved_timestamp": "1762652579.632808",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "GritLM/GritLM-7B-KTO",
- "developer": "GritLM",
- "inference_platform": "unknown",
- "id": "GritLM/GritLM-7B-KTO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5310132670203948
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.485293719684692
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.027190332326283987
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37102083333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26803523936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/GritLM/GritLM_GritLM-8x7B-KTO/de98eb82-0606-46b8-bbfb-d054a0f6ef2c.json b/leaderboard_data/HFOpenLLMv2/GritLM/GritLM_GritLM-8x7B-KTO/de98eb82-0606-46b8-bbfb-d054a0f6ef2c.json
deleted file mode 100644
index fb1384089ea721c04d894bd48cdd83b3310f7d43..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/GritLM/GritLM_GritLM-8x7B-KTO/de98eb82-0606-46b8-bbfb-d054a0f6ef2c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/GritLM_GritLM-8x7B-KTO/1762652579.633089",
- "retrieved_timestamp": "1762652579.633089",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "GritLM/GritLM-8x7B-KTO",
- "developer": "GritLM",
- "inference_platform": "unknown",
- "id": "GritLM/GritLM-8x7B-KTO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5714049832222946
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5820304362331497
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12235649546827794
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42165625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36477726063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 46.703
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-1.5-12b-Nemo/f9ed0b0f-6fa9-4450-97fe-204f6dc8d88a.json b/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-1.5-12b-Nemo/f9ed0b0f-6fa9-4450-97fe-204f6dc8d88a.json
deleted file mode 100644
index 89ae80fb263a4200a718de2cc27b6b7e381e4f66..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-1.5-12b-Nemo/f9ed0b0f-6fa9-4450-97fe-204f6dc8d88a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-1.5-12b-Nemo/1762652579.633812",
- "retrieved_timestamp": "1762652579.633813",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Gryphe/Pantheon-RP-1.5-12b-Nemo",
- "developer": "Gryphe",
- "inference_platform": "unknown",
- "id": "Gryphe/Pantheon-RP-1.5-12b-Nemo"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47630841722186024
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.519582216884963
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04909365558912387
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44203125000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3302027925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-1.6-12b-Nemo-KTO/a2445d2d-b8a2-44e4-9c74-7401e7afde75.json b/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-1.6-12b-Nemo-KTO/a2445d2d-b8a2-44e4-9c74-7401e7afde75.json
deleted file mode 100644
index 1b71efd901cda7f0acfa82a62819b1501c7e23d2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-1.6-12b-Nemo-KTO/a2445d2d-b8a2-44e4-9c74-7401e7afde75.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-1.6-12b-Nemo-KTO/1762652579.634284",
- "retrieved_timestamp": "1762652579.634285",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO",
- "developer": "Gryphe",
- "inference_platform": "unknown",
- "id": "Gryphe/Pantheon-RP-1.6-12b-Nemo-KTO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4636187537954849
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5276980814125921
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.052870090634441085
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4247916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33818151595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-1.6-12b-Nemo/9a2ca2e5-a2e9-460f-b4dc-a6293ca13003.json b/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-1.6-12b-Nemo/9a2ca2e5-a2e9-460f-b4dc-a6293ca13003.json
deleted file mode 100644
index bd1320b2b1bad1173c137e8e3de7d8aeae58c605..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-1.6-12b-Nemo/9a2ca2e5-a2e9-460f-b4dc-a6293ca13003.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-1.6-12b-Nemo/1762652579.634059",
- "retrieved_timestamp": "1762652579.6340601",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Gryphe/Pantheon-RP-1.6-12b-Nemo",
- "developer": "Gryphe",
- "inference_platform": "unknown",
- "id": "Gryphe/Pantheon-RP-1.6-12b-Nemo"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44805671174705336
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5204007434392454
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04607250755287009
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4287604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33111702127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-Pure-1.6.2-22b-Small/f5f73aa0-2223-49c0-a2ad-df38ee33355b.json b/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-Pure-1.6.2-22b-Small/f5f73aa0-2223-49c0-a2ad-df38ee33355b.json
deleted file mode 100644
index 8d4e7e18b7e323bc61c7ea597db1f9d33b143707..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Gryphe/Gryphe_Pantheon-RP-Pure-1.6.2-22b-Small/f5f73aa0-2223-49c0-a2ad-df38ee33355b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Gryphe_Pantheon-RP-Pure-1.6.2-22b-Small/1762652579.6344929",
- "retrieved_timestamp": "1762652579.6344929",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small",
- "developer": "Gryphe",
- "inference_platform": "unknown",
- "id": "Gryphe/Pantheon-RP-Pure-1.6.2-22b-Small"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6931042965996888
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5304537230538597
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20241691842900303
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3288590604026846
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37647916666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39419880319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 22.247
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/GuilhermeNaturaUmana/GuilhermeNaturaUmana_Nature-Reason-1.2-reallysmall/5aa1bdc6-4b8f-411f-9150-41217a94ec5e.json b/leaderboard_data/HFOpenLLMv2/GuilhermeNaturaUmana/GuilhermeNaturaUmana_Nature-Reason-1.2-reallysmall/5aa1bdc6-4b8f-411f-9150-41217a94ec5e.json
deleted file mode 100644
index e0c5e6e207085b136a481845472bc38ce81d63c6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/GuilhermeNaturaUmana/GuilhermeNaturaUmana_Nature-Reason-1.2-reallysmall/5aa1bdc6-4b8f-411f-9150-41217a94ec5e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/GuilhermeNaturaUmana_Nature-Reason-1.2-reallysmall/1762652579.63471",
- "retrieved_timestamp": "1762652579.634711",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall",
- "developer": "GuilhermeNaturaUmana",
- "inference_platform": "unknown",
- "id": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4985405391029136
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5644838945274894
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25755287009063443
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43728125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44290226063829785
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/GuilhermeNaturaUmana/GuilhermeNaturaUmana_Nature-Reason-1.2-reallysmall/9ddf874c-16a9-4f66-a3c5-140f10bc4787.json b/leaderboard_data/HFOpenLLMv2/GuilhermeNaturaUmana/GuilhermeNaturaUmana_Nature-Reason-1.2-reallysmall/9ddf874c-16a9-4f66-a3c5-140f10bc4787.json
deleted file mode 100644
index 933bf263cce590fd9d86b8c1862b75bec34e3ea9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/GuilhermeNaturaUmana/GuilhermeNaturaUmana_Nature-Reason-1.2-reallysmall/9ddf874c-16a9-4f66-a3c5-140f10bc4787.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/GuilhermeNaturaUmana_Nature-Reason-1.2-reallysmall/1762652579.634963",
- "retrieved_timestamp": "1762652579.634964",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall",
- "developer": "GuilhermeNaturaUmana",
- "inference_platform": "unknown",
- "id": "GuilhermeNaturaUmana/Nature-Reason-1.2-reallysmall"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47910654840268263
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5648715950622487
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4439166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4408244680851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HarbingerX/HarbingerX_Zeitgeist-3b-V1.2/37dad0cc-36d1-4a4c-8d9c-0f5246889a0c.json b/leaderboard_data/HFOpenLLMv2/HarbingerX/HarbingerX_Zeitgeist-3b-V1.2/37dad0cc-36d1-4a4c-8d9c-0f5246889a0c.json
deleted file mode 100644
index c13f5a5cb0c0e0c78c59efba4334427ae203af6e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HarbingerX/HarbingerX_Zeitgeist-3b-V1.2/37dad0cc-36d1-4a4c-8d9c-0f5246889a0c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HarbingerX_Zeitgeist-3b-V1.2/1762652579.6374269",
- "retrieved_timestamp": "1762652579.637428",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HarbingerX/Zeitgeist-3b-V1.2",
- "developer": "HarbingerX",
- "inference_platform": "unknown",
- "id": "HarbingerX/Zeitgeist-3b-V1.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6754189993661264
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4440650477102142
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10120845921450151
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35790625000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30560172872340424
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HarbingerX/HarbingerX_Zeitgeist-3b-V1/3bc34460-661d-404b-bb1c-5b2fe395b897.json b/leaderboard_data/HFOpenLLMv2/HarbingerX/HarbingerX_Zeitgeist-3b-V1/3bc34460-661d-404b-bb1c-5b2fe395b897.json
deleted file mode 100644
index 7eaa7ffd8d39cccef6a9de835e078deb6a3bf97c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HarbingerX/HarbingerX_Zeitgeist-3b-V1/3bc34460-661d-404b-bb1c-5b2fe395b897.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HarbingerX_Zeitgeist-3b-V1/1762652579.637166",
- "retrieved_timestamp": "1762652579.6371672",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HarbingerX/Zeitgeist-3b-V1",
- "developer": "HarbingerX",
- "inference_platform": "unknown",
- "id": "HarbingerX/Zeitgeist-3b-V1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6711724889958643
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4440790761237121
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10347432024169184
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28187919463087246
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3579375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3009474734042553
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Hastagaras/Hastagaras_L3.2-JametMini-3B-MK.III/cf208ef7-8a9b-4633-8161-dae0825c380e.json b/leaderboard_data/HFOpenLLMv2/Hastagaras/Hastagaras_L3.2-JametMini-3B-MK.III/cf208ef7-8a9b-4633-8161-dae0825c380e.json
deleted file mode 100644
index eaaad28c8a8616666ba5b09d368ad9f929756c27..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Hastagaras/Hastagaras_L3.2-JametMini-3B-MK.III/cf208ef7-8a9b-4633-8161-dae0825c380e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Hastagaras_L3.2-JametMini-3B-MK.III/1762652579.6376362",
- "retrieved_timestamp": "1762652579.6376371",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Hastagaras/L3.2-JametMini-3B-MK.III",
- "developer": "Hastagaras",
- "inference_platform": "unknown",
- "id": "Hastagaras/L3.2-JametMini-3B-MK.III"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6182662003484088
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45385245294894094
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14577039274924472
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3686041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2982878989361702
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Cipher-20B/21f72176-cf3b-43ae-aa6e-51d9fe5a6e90.json b/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Cipher-20B/21f72176-cf3b-43ae-aa6e-51d9fe5a6e90.json
deleted file mode 100644
index 7695ed0f03390b91dfcdfdc6f800c26b9f336bd2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Cipher-20B/21f72176-cf3b-43ae-aa6e-51d9fe5a6e90.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HelpingAI_Cipher-20B/1762652579.638349",
- "retrieved_timestamp": "1762652579.63835",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HelpingAI/Cipher-20B",
- "developer": "HelpingAI",
- "inference_platform": "unknown",
- "id": "HelpingAI/Cipher-20B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5377575942942504
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6032432743536918
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19939577039274925
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40029166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3744182180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 20.551
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Dhanishtha-Large/e097ccca-ab91-4f16-bbfa-ca97c91fdb77.json b/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Dhanishtha-Large/e097ccca-ab91-4f16-bbfa-ca97c91fdb77.json
deleted file mode 100644
index af7d5be7d050742757864777f2920fa084d2601e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Dhanishtha-Large/e097ccca-ab91-4f16-bbfa-ca97c91fdb77.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HelpingAI_Dhanishtha-Large/1762652579.638597",
- "retrieved_timestamp": "1762652579.638598",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HelpingAI/Dhanishtha-Large",
- "developer": "HelpingAI",
- "inference_platform": "unknown",
- "id": "HelpingAI/Dhanishtha-Large"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24567370133468086
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46036539145861094
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3851963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38451041666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2755152925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Priya-10B/94aca944-b0a9-46ec-bdab-53bb5cbe3b78.json b/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Priya-10B/94aca944-b0a9-46ec-bdab-53bb5cbe3b78.json
deleted file mode 100644
index a2bdd995193b81175dd6eb7e674f056663ba1579..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Priya-10B/94aca944-b0a9-46ec-bdab-53bb5cbe3b78.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HelpingAI_Priya-10B/1762652579.638817",
- "retrieved_timestamp": "1762652579.638818",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HelpingAI/Priya-10B",
- "developer": "HelpingAI",
- "inference_platform": "unknown",
- "id": "HelpingAI/Priya-10B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40429283190822574
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4441457310476767
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0188821752265861
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2558724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3792708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24925199468085107
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.211
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Priya-3B/f709afd7-3220-41b0-909a-74d9086c7dd9.json b/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Priya-3B/f709afd7-3220-41b0-909a-74d9086c7dd9.json
deleted file mode 100644
index 491e056ff9e4ad4e3db158404e5224b1f62419a0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HelpingAI/HelpingAI_Priya-3B/f709afd7-3220-41b0-909a-74d9086c7dd9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HelpingAI_Priya-3B/1762652579.639023",
- "retrieved_timestamp": "1762652579.639024",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HelpingAI/Priya-3B",
- "developer": "HelpingAI",
- "inference_platform": "unknown",
- "id": "HelpingAI/Priya-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4525780484669566
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3961184863327844
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.014350453172205438
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25671140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3713020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23387632978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 2.81
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceH4/HuggingFaceH4_zephyr-7b-alpha/2029aa96-40b2-4af8-a7fa-8ae968b20502.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceH4/HuggingFaceH4_zephyr-7b-alpha/2029aa96-40b2-4af8-a7fa-8ae968b20502.json
deleted file mode 100644
index 58383a59a037d9f37c4e9cdda9d3f39ccffbb99b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HuggingFaceH4/HuggingFaceH4_zephyr-7b-alpha/2029aa96-40b2-4af8-a7fa-8ae968b20502.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HuggingFaceH4_zephyr-7b-alpha/1762652579.640769",
- "retrieved_timestamp": "1762652579.64077",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HuggingFaceH4/zephyr-7b-alpha",
- "developer": "HuggingFaceH4",
- "inference_platform": "unknown",
- "id": "HuggingFaceH4/zephyr-7b-alpha"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5191480826429429
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45828635059044115
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.019637462235649546
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3949583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2795046542553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceH4/HuggingFaceH4_zephyr-7b-beta/3b9d5166-4144-4222-a39d-3d1d3956a6e8.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceH4/HuggingFaceH4_zephyr-7b-beta/3b9d5166-4144-4222-a39d-3d1d3956a6e8.json
deleted file mode 100644
index 737d808d10d243c3e699bdc382a4ba8ddecd2f44..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HuggingFaceH4/HuggingFaceH4_zephyr-7b-beta/3b9d5166-4144-4222-a39d-3d1d3956a6e8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HuggingFaceH4_zephyr-7b-beta/1762652579.641025",
- "retrieved_timestamp": "1762652579.641026",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HuggingFaceH4/zephyr-7b-beta",
- "developer": "HuggingFaceH4",
- "inference_platform": "unknown",
- "id": "HuggingFaceH4/zephyr-7b-beta"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49504315216957673
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.431582191918003
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.028700906344410877
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3925416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2780917553191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceH4/HuggingFaceH4_zephyr-orpo-141b-A35b-v0.1/8b347bb4-9f6d-4c82-bd5d-2fb5f7c8f881.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceH4/HuggingFaceH4_zephyr-orpo-141b-A35b-v0.1/8b347bb4-9f6d-4c82-bd5d-2fb5f7c8f881.json
deleted file mode 100644
index f2336f38bfd558226cbbd151ba5e1be134eb0ea3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HuggingFaceH4/HuggingFaceH4_zephyr-orpo-141b-A35b-v0.1/8b347bb4-9f6d-4c82-bd5d-2fb5f7c8f881.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HuggingFaceH4_zephyr-orpo-141b-A35b-v0.1/1762652579.641484",
- "retrieved_timestamp": "1762652579.641485",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1",
- "developer": "HuggingFaceH4",
- "inference_platform": "unknown",
- "id": "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6510891102275296
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6290439728524093
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20468277945619334
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3783557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4465208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4586103723404255
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 140.621
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-1.7B-Instruct/690a5844-000e-4949-bbf9-8bd1ff2cb1bd.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-1.7B-Instruct/690a5844-000e-4949-bbf9-8bd1ff2cb1bd.json
deleted file mode 100644
index d491265c88174a5e752c5e5a4b11d5b4c7aaf322..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-1.7B-Instruct/690a5844-000e-4949-bbf9-8bd1ff2cb1bd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-1.7B-Instruct/1762652579.641991",
- "retrieved_timestamp": "1762652579.641991",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HuggingFaceTB/SmolLM-1.7B-Instruct",
- "developer": "HuggingFaceTB",
- "inference_platform": "unknown",
- "id": "HuggingFaceTB/SmolLM-1.7B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23478259905938464
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28851114363217695
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.021148036253776436
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3486666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11660571808510638
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.71
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-1.7B/e1b7c18a-bff1-44a3-b589-95bcb0f88e36.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-1.7B/e1b7c18a-bff1-44a3-b589-95bcb0f88e36.json
deleted file mode 100644
index 8a24a576a83065e42cca88d230a44a8b4b50a7c4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-1.7B/e1b7c18a-bff1-44a3-b589-95bcb0f88e36.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-1.7B/1762652579.6417458",
- "retrieved_timestamp": "1762652579.6417458",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HuggingFaceTB/SmolLM-1.7B",
- "developer": "HuggingFaceTB",
- "inference_platform": "unknown",
- "id": "HuggingFaceTB/SmolLM-1.7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23615673080759053
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3180516538964782
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01661631419939577
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24161073825503357
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34209375000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11477726063829788
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.71
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-135M-Instruct/adff7af4-9bae-420a-9751-9f68ab81bf99.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-135M-Instruct/adff7af4-9bae-420a-9751-9f68ab81bf99.json
deleted file mode 100644
index a3a8be93fa0185a2cf3a4f9fd6bcd29d7bd8b7c9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-135M-Instruct/adff7af4-9bae-420a-9751-9f68ab81bf99.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-135M-Instruct/1762652579.642397",
- "retrieved_timestamp": "1762652579.6423979",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HuggingFaceTB/SmolLM-135M-Instruct",
- "developer": "HuggingFaceTB",
- "inference_platform": "unknown",
- "id": "HuggingFaceTB/SmolLM-135M-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12140121544169469
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30150816789978757
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.005287009063444109
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36345833333333327
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11760305851063829
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-135M/8cd60e42-3429-4938-b43e-9c951a57ca9f.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-135M/8cd60e42-3429-4938-b43e-9c951a57ca9f.json
deleted file mode 100644
index ef56ece1f0b881b3173582c7988d8fa47f0dcb87..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-135M/8cd60e42-3429-4938-b43e-9c951a57ca9f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-135M/1762652579.642195",
- "retrieved_timestamp": "1762652579.642196",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HuggingFaceTB/SmolLM-135M",
- "developer": "HuggingFaceTB",
- "inference_platform": "unknown",
- "id": "HuggingFaceTB/SmolLM-135M"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21247622973709757
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3046054260062988
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.013595166163141994
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25838926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4366041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11220079787234043
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.13
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-360M-Instruct/ec13c105-c846-4420-91af-d42e98b7a818.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-360M-Instruct/ec13c105-c846-4420-91af-d42e98b7a818.json
deleted file mode 100644
index 5a94fe9cf3b1f20c90732bf4130789bcfb46a12d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-360M-Instruct/ec13c105-c846-4420-91af-d42e98b7a818.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-360M-Instruct/1762652579.642821",
- "retrieved_timestamp": "1762652579.642821",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HuggingFaceTB/SmolLM-360M-Instruct",
- "developer": "HuggingFaceTB",
- "inference_platform": "unknown",
- "id": "HuggingFaceTB/SmolLM-360M-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19516549422199764
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28851114363217695
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01812688821752266
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34717708333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11660571808510638
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.362
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-360M/236f7bdd-be50-4287-82b7-6efddc9dd3f4.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-360M/236f7bdd-be50-4287-82b7-6efddc9dd3f4.json
deleted file mode 100644
index d2462466b27565b2fe3fc8df8ac07c4cc8bf0fae..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM-360M/236f7bdd-be50-4287-82b7-6efddc9dd3f4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM-360M/1762652579.642613",
- "retrieved_timestamp": "1762652579.6426141",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HuggingFaceTB/SmolLM-360M",
- "developer": "HuggingFaceTB",
- "inference_platform": "unknown",
- "id": "HuggingFaceTB/SmolLM-360M"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2133505764704318
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30645160333152527
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.011329305135951661
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40178125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11236702127659574
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.36
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-1.7B-Instruct/09b81183-8ff2-44d5-a515-63cddc3e55c6.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-1.7B-Instruct/09b81183-8ff2-44d5-a515-63cddc3e55c6.json
deleted file mode 100644
index 8aea073920e7e6b357310ae4b934dfe573ffdc74..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-1.7B-Instruct/09b81183-8ff2-44d5-a515-63cddc3e55c6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-1.7B-Instruct/1762652579.643299",
- "retrieved_timestamp": "1762652579.6433",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
- "developer": "HuggingFaceTB",
- "inference_platform": "unknown",
- "id": "HuggingFaceTB/SmolLM2-1.7B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5367835121920947
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3598617531415158
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0581570996978852
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.342125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2053690159574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.711
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-1.7B/db57503c-bfe7-4691-983e-68af941e8b1e.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-1.7B/db57503c-bfe7-4691-983e-68af941e8b1e.json
deleted file mode 100644
index 6e79543b3c7a7d1fd9a537fa05d433b7ad92fe66..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-1.7B/db57503c-bfe7-4691-983e-68af941e8b1e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-1.7B/1762652579.6430368",
- "retrieved_timestamp": "1762652579.643038",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HuggingFaceTB/SmolLM2-1.7B",
- "developer": "HuggingFaceTB",
- "inference_platform": "unknown",
- "id": "HuggingFaceTB/SmolLM2-1.7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2440003634800108
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3452594377166261
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.026435045317220542
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3485416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2137632978723404
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.71
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-135M-Instruct/9a9fb17d-49ae-4a82-95c8-c8b55923d72f.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-135M-Instruct/9a9fb17d-49ae-4a82-95c8-c8b55923d72f.json
deleted file mode 100644
index b84ca446f0b7bff66ca275844f9caed9a5c8ae42..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-135M-Instruct/9a9fb17d-49ae-4a82-95c8-c8b55923d72f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-135M-Instruct/1762652579.644038",
- "retrieved_timestamp": "1762652579.644039",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HuggingFaceTB/SmolLM2-135M-Instruct",
- "developer": "HuggingFaceTB",
- "inference_platform": "unknown",
- "id": "HuggingFaceTB/SmolLM2-135M-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05925167444602544
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31347502947335903
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.014350453172205438
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23406040268456377
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3871458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10920877659574468
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-135M-Instruct/df60b16b-184c-43d9-ac79-8627f09d265b.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-135M-Instruct/df60b16b-184c-43d9-ac79-8627f09d265b.json
deleted file mode 100644
index 3f8103f38b4f00932486995ff22094ac6624f122..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-135M-Instruct/df60b16b-184c-43d9-ac79-8627f09d265b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-135M-Instruct/1762652579.643796",
- "retrieved_timestamp": "1762652579.643796",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HuggingFaceTB/SmolLM2-135M-Instruct",
- "developer": "HuggingFaceTB",
- "inference_platform": "unknown",
- "id": "HuggingFaceTB/SmolLM2-135M-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2883138960181208
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3124321328066677
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0030211480362537764
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23573825503355705
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36621875000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11145279255319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-135M/1761caca-524f-4d59-81dd-631e3e24e0e5.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-135M/1761caca-524f-4d59-81dd-631e3e24e0e5.json
deleted file mode 100644
index 4b67a7664281db0efc7c1fa05577b36ac6c456a7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-135M/1761caca-524f-4d59-81dd-631e3e24e0e5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-135M/1762652579.643546",
- "retrieved_timestamp": "1762652579.6435468",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HuggingFaceTB/SmolLM2-135M",
- "developer": "HuggingFaceTB",
- "inference_platform": "unknown",
- "id": "HuggingFaceTB/SmolLM2-135M"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18177657504310785
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3044234246877141
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.012084592145015106
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2483221476510067
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4111770833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10945811170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-360M-Instruct/06409b6c-9d26-4bee-af75-16e6edb87a93.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-360M-Instruct/06409b6c-9d26-4bee-af75-16e6edb87a93.json
deleted file mode 100644
index b491ae17e470f57dda6a27bddd2b8094bb0f195e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-360M-Instruct/06409b6c-9d26-4bee-af75-16e6edb87a93.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-360M-Instruct/1762652579.644474",
- "retrieved_timestamp": "1762652579.644475",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HuggingFaceTB/SmolLM2-360M-Instruct",
- "developer": "HuggingFaceTB",
- "inference_platform": "unknown",
- "id": "HuggingFaceTB/SmolLM2-360M-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08303191088533979
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3052703401844317
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.008308157099697885
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34228125000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11261635638297872
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.362
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-360M-Instruct/09ba6e80-5ab4-4c8c-b7ad-c1497413c207.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-360M-Instruct/09ba6e80-5ab4-4c8c-b7ad-c1497413c207.json
deleted file mode 100644
index 8390cf0fe8a3351f3ffa543fea1a30cb96e66c17..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-360M-Instruct/09ba6e80-5ab4-4c8c-b7ad-c1497413c207.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-360M-Instruct/1762652579.6446972",
- "retrieved_timestamp": "1762652579.6446981",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HuggingFaceTB/SmolLM2-360M-Instruct",
- "developer": "HuggingFaceTB",
- "inference_platform": "unknown",
- "id": "HuggingFaceTB/SmolLM2-360M-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38415958545548035
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31435050538888504
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015105740181268883
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2550335570469799
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.346125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11170212765957446
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.36
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-360M/7751b65d-2bba-465c-9a1e-5ae51d94fcf6.json b/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-360M/7751b65d-2bba-465c-9a1e-5ae51d94fcf6.json
deleted file mode 100644
index d951d5a809cf2cc4140b2a6e4b4012a2442f0f86..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HuggingFaceTB/HuggingFaceTB_SmolLM2-360M/7751b65d-2bba-465c-9a1e-5ae51d94fcf6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HuggingFaceTB_SmolLM2-360M/1762652579.6442492",
- "retrieved_timestamp": "1762652579.6442502",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HuggingFaceTB/SmolLM2-360M",
- "developer": "HuggingFaceTB",
- "inference_platform": "unknown",
- "id": "HuggingFaceTB/SmolLM2-360M"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21145227995053123
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3233478044302361
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.012084592145015106
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24580536912751677
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3954270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11693816489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.36
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HumanLLMs/HumanLLMs_Humanish-LLama3-8B-Instruct/e69e4e90-8177-44f5-8497-0a45ca9155ea.json b/leaderboard_data/HFOpenLLMv2/HumanLLMs/HumanLLMs_Humanish-LLama3-8B-Instruct/e69e4e90-8177-44f5-8497-0a45ca9155ea.json
deleted file mode 100644
index 4b5a70e0b19dd07b3025ab7aeaf2f838dc33e08c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HumanLLMs/HumanLLMs_Humanish-LLama3-8B-Instruct/e69e4e90-8177-44f5-8497-0a45ca9155ea.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HumanLLMs_Humanish-LLama3-8B-Instruct/1762652579.6448839",
- "retrieved_timestamp": "1762652579.644885",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HumanLLMs/Humanish-LLama3-8B-Instruct",
- "developer": "HumanLLMs",
- "inference_platform": "unknown",
- "id": "HumanLLMs/Humanish-LLama3-8B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6497903340913221
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49677096627896544
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1027190332326284
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2558724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35815624999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37017952127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HumanLLMs/HumanLLMs_Humanish-Mistral-Nemo-Instruct-2407/de0dbc50-5d26-4005-967c-3dcbde3a1282.json b/leaderboard_data/HFOpenLLMv2/HumanLLMs/HumanLLMs_Humanish-Mistral-Nemo-Instruct-2407/de0dbc50-5d26-4005-967c-3dcbde3a1282.json
deleted file mode 100644
index 1f457a2336abca2d80d31de01f0105ddd9ad0118..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HumanLLMs/HumanLLMs_Humanish-Mistral-Nemo-Instruct-2407/de0dbc50-5d26-4005-967c-3dcbde3a1282.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HumanLLMs_Humanish-Mistral-Nemo-Instruct-2407/1762652579.6451478",
- "retrieved_timestamp": "1762652579.645149",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407",
- "developer": "HumanLLMs",
- "inference_platform": "unknown",
- "id": "HumanLLMs/Humanish-Mistral-Nemo-Instruct-2407"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5451269298793867
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5261780772532613
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13670694864048338
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.287751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39676041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35206117021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/HumanLLMs/HumanLLMs_Humanish-Qwen2.5-7B-Instruct/df720663-5e82-4de7-9a19-88287bb5f56a.json b/leaderboard_data/HFOpenLLMv2/HumanLLMs/HumanLLMs_Humanish-Qwen2.5-7B-Instruct/df720663-5e82-4de7-9a19-88287bb5f56a.json
deleted file mode 100644
index 0e791bf417c3c55ac7cde89209a1ff64c326a9de..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/HumanLLMs/HumanLLMs_Humanish-Qwen2.5-7B-Instruct/df720663-5e82-4de7-9a19-88287bb5f56a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HumanLLMs_Humanish-Qwen2.5-7B-Instruct/1762652579.645365",
- "retrieved_timestamp": "1762652579.645366",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HumanLLMs/Humanish-Qwen2.5-7B-Instruct",
- "developer": "HumanLLMs",
- "inference_platform": "unknown",
- "id": "HumanLLMs/Humanish-Qwen2.5-7B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7284250233824031
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5363681457807072
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3980625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4398271276595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3-1/23b6bf8e-c79a-4620-9e15-2742f45130af.json b/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3-1/23b6bf8e-c79a-4620-9e15-2742f45130af.json
deleted file mode 100644
index f4372cd850e24401b523058e8ff026e196c3e1b6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3-1/23b6bf8e-c79a-4620-9e15-2742f45130af.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Intel_neural-chat-7b-v3-1/1762652579.6473012",
- "retrieved_timestamp": "1762652579.647302",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Intel/neural-chat-7b-v3-1",
- "developer": "Intel",
- "inference_platform": "unknown",
- "id": "Intel/neural-chat-7b-v3-1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4686897432146704
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5051565464054848
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.035498489425981876
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49789583333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2677859042553192
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3-2/f8842523-53de-4197-9cf4-979780cbe127.json b/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3-2/f8842523-53de-4197-9cf4-979780cbe127.json
deleted file mode 100644
index 08e7574a2b012cb1913a4f9c609f7eb1b17c2aae..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3-2/f8842523-53de-4197-9cf4-979780cbe127.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Intel_neural-chat-7b-v3-2/1762652579.647583",
- "retrieved_timestamp": "1762652579.647584",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Intel/neural-chat-7b-v3-2",
- "developer": "Intel",
- "inference_platform": "unknown",
- "id": "Intel/neural-chat-7b-v3-2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4988397452093778
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5032226831964403
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04758308157099698
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48952083333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26670545212765956
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3-3/0bec0f9a-863b-42f5-96eb-7263eb1c8a61.json b/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3-3/0bec0f9a-863b-42f5-96eb-7263eb1c8a61.json
deleted file mode 100644
index 55271e9a4bbc16a1644219f4fe888e2bbdd9174d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3-3/0bec0f9a-863b-42f5-96eb-7263eb1c8a61.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Intel_neural-chat-7b-v3-3/1762652579.6477928",
- "retrieved_timestamp": "1762652579.647794",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Intel/neural-chat-7b-v3-3",
- "developer": "Intel",
- "inference_platform": "unknown",
- "id": "Intel/neural-chat-7b-v3-3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4762585495374495
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48766180524289693
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04078549848942598
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4859583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2624667553191489
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3/617dbd41-3ca3-46d8-8fd2-491d6be39554.json b/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3/617dbd41-3ca3-46d8-8fd2-491d6be39554.json
deleted file mode 100644
index 84cbcc05bc5a969af323fc811eda5eb57a0725e2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Intel/Intel_neural-chat-7b-v3/617dbd41-3ca3-46d8-8fd2-491d6be39554.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Intel_neural-chat-7b-v3/1762652579.646828",
- "retrieved_timestamp": "1762652579.6468291",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Intel/neural-chat-7b-v3",
- "developer": "Intel",
- "inference_platform": "unknown",
- "id": "Intel/neural-chat-7b-v3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27779735546128714
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5048316221363103
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02945619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5054895833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26986369680851063
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Invalid-Null/Invalid-Null_PeiYangMe-0.5/c645a252-366a-4890-a16b-bf687bfbb593.json b/leaderboard_data/HFOpenLLMv2/Invalid-Null/Invalid-Null_PeiYangMe-0.5/c645a252-366a-4890-a16b-bf687bfbb593.json
deleted file mode 100644
index 7bbd2573fa69babfb73405d1a5f408249d636044..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Invalid-Null/Invalid-Null_PeiYangMe-0.5/c645a252-366a-4890-a16b-bf687bfbb593.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Invalid-Null_PeiYangMe-0.5/1762652579.648252",
- "retrieved_timestamp": "1762652579.648252",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Invalid-Null/PeiYangMe-0.5",
- "developer": "Invalid-Null",
- "inference_platform": "unknown",
- "id": "Invalid-Null/PeiYangMe-0.5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14088507382633633
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27907748194216614
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24412751677852348
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37381249999999994
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11087101063829788
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.061
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Invalid-Null/Invalid-Null_PeiYangMe-0.7/294c1745-38cb-4b1e-aae6-e2878ab9065a.json b/leaderboard_data/HFOpenLLMv2/Invalid-Null/Invalid-Null_PeiYangMe-0.7/294c1745-38cb-4b1e-aae6-e2878ab9065a.json
deleted file mode 100644
index 0faf507c26620ef798faa1e8d69c81c3b20d0bbc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Invalid-Null/Invalid-Null_PeiYangMe-0.7/294c1745-38cb-4b1e-aae6-e2878ab9065a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Invalid-Null_PeiYangMe-0.7/1762652579.648521",
- "retrieved_timestamp": "1762652579.648522",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Invalid-Null/PeiYangMe-0.7",
- "developer": "Invalid-Null",
- "inference_platform": "unknown",
- "id": "Invalid-Null/PeiYangMe-0.7"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1491032682172192
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30275310145886614
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.011329305135951661
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2332214765100671
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38571874999999994
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11012300531914894
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.061
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_JOSIEv4o-8b-stage1-v4/e8bdfeef-9795-4b00-adec-6ac41c6718f7.json b/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_JOSIEv4o-8b-stage1-v4/e8bdfeef-9795-4b00-adec-6ac41c6718f7.json
deleted file mode 100644
index bffaabc96994fc1eea9c3c0e3951f19a9cfae5d1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_JOSIEv4o-8b-stage1-v4/e8bdfeef-9795-4b00-adec-6ac41c6718f7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Isaak-Carter_JOSIEv4o-8b-stage1-v4/1762652579.648735",
- "retrieved_timestamp": "1762652579.648736",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Isaak-Carter/JOSIEv4o-8b-stage1-v4",
- "developer": "Isaak-Carter",
- "inference_platform": "unknown",
- "id": "Isaak-Carter/JOSIEv4o-8b-stage1-v4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2552660274737696
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4724973116620121
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.052870090634441085
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3654375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3316156914893617
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_JOSIEv4o-8b-stage1-v4/f28b57ba-103a-41bb-93b0-7b25fd155351.json b/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_JOSIEv4o-8b-stage1-v4/f28b57ba-103a-41bb-93b0-7b25fd155351.json
deleted file mode 100644
index d854eea01b97d6f4c64d11d6a2d0530a0ff151ab..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_JOSIEv4o-8b-stage1-v4/f28b57ba-103a-41bb-93b0-7b25fd155351.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Isaak-Carter_JOSIEv4o-8b-stage1-v4/1762652579.6489909",
- "retrieved_timestamp": "1762652579.648992",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Isaak-Carter/JOSIEv4o-8b-stage1-v4",
- "developer": "Isaak-Carter",
- "inference_platform": "unknown",
- "id": "Isaak-Carter/JOSIEv4o-8b-stage1-v4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2476972211509905
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4758066295235124
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.045317220543806644
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3641041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32920545212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/817eb9e1-bd7d-4033-b0ea-bc7df58dc087.json b/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/817eb9e1-bd7d-4033-b0ea-bc7df58dc087.json
deleted file mode 100644
index 7b3a4d42769219d286d55fa620867a79b2723ad3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/817eb9e1-bd7d-4033-b0ea-bc7df58dc087.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Isaak-Carter_Josiefied-Qwen2.5-7B-Instruct-abliterated-v2/1762652579.649409",
- "retrieved_timestamp": "1762652579.64941",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2",
- "developer": "Isaak-Carter",
- "inference_platform": "unknown",
- "id": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7841039552830933
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5310923599182072
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47205438066465255
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43539583333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4128158244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_Josiefied-Qwen2.5-7B-Instruct-abliterated/2013b3a9-3644-4f66-9941-b5d2ba6e7b81.json b/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_Josiefied-Qwen2.5-7B-Instruct-abliterated/2013b3a9-3644-4f66-9941-b5d2ba6e7b81.json
deleted file mode 100644
index 9ba116a2c287b664ceb0e4a4923ffa7a7de8ec54..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Isaak-Carter/Isaak-Carter_Josiefied-Qwen2.5-7B-Instruct-abliterated/2013b3a9-3644-4f66-9941-b5d2ba6e7b81.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Isaak-Carter_Josiefied-Qwen2.5-7B-Instruct-abliterated/1762652579.6491818",
- "retrieved_timestamp": "1762652579.649183",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated",
- "developer": "Isaak-Carter",
- "inference_platform": "unknown",
- "id": "Isaak-Carter/Josiefied-Qwen2.5-7B-Instruct-abliterated"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7317473193349202
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5396376284460921
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49244712990936557
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4086666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4276097074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/J-LAB/J-LAB_Thynk_orpo/3565fba3-e63d-49f8-9e8f-deef83531eb9.json b/leaderboard_data/HFOpenLLMv2/J-LAB/J-LAB_Thynk_orpo/3565fba3-e63d-49f8-9e8f-deef83531eb9.json
deleted file mode 100644
index c67d593c23341ee9b9900ca386bd0baca4386a3e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/J-LAB/J-LAB_Thynk_orpo/3565fba3-e63d-49f8-9e8f-deef83531eb9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/J-LAB_Thynk_orpo/1762652579.649622",
- "retrieved_timestamp": "1762652579.6496232",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "J-LAB/Thynk_orpo",
- "developer": "J-LAB",
- "inference_platform": "unknown",
- "id": "J-LAB/Thynk_orpo"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21017788357114678
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44631138778709606
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14803625377643503
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29278523489932884
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45147916666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32313829787234044
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Casual-Magnum-34B/0b9358f8-1e27-448f-9932-1f2c6feac036.json b/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Casual-Magnum-34B/0b9358f8-1e27-448f-9932-1f2c6feac036.json
deleted file mode 100644
index aa365b17d53b4e8979c2759fc0f16f398479bc4d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Casual-Magnum-34B/0b9358f8-1e27-448f-9932-1f2c6feac036.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Jacoby746_Casual-Magnum-34B/1762652579.65033",
- "retrieved_timestamp": "1762652579.6503308",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Jacoby746/Casual-Magnum-34B",
- "developer": "Jacoby746",
- "inference_platform": "unknown",
- "id": "Jacoby746/Casual-Magnum-34B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19301675110927893
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6032046880542974
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09214501510574018
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3724832214765101
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4077604166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5183676861702128
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.389
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Inf-Silent-Kunoichi-v0.1-2x7B/d1fa6abf-be2b-4ea6-bcbe-066ac37aa54f.json b/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Inf-Silent-Kunoichi-v0.1-2x7B/d1fa6abf-be2b-4ea6-bcbe-066ac37aa54f.json
deleted file mode 100644
index c0b5bf2af747f0478f07fea6730f920107fc6bb2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Inf-Silent-Kunoichi-v0.1-2x7B/d1fa6abf-be2b-4ea6-bcbe-066ac37aa54f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Jacoby746_Inf-Silent-Kunoichi-v0.1-2x7B/1762652579.6505952",
- "retrieved_timestamp": "1762652579.6505961",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B",
- "developer": "Jacoby746",
- "inference_platform": "unknown",
- "id": "Jacoby746/Inf-Silent-Kunoichi-v0.1-2x7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38798166642286913
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.518546209727402
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07099697885196375
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42804166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3271276595744681
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 12.879
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Inf-Silent-Kunoichi-v0.2-2x7B/f611991b-11c1-4232-bc63-8cf2942605ae.json b/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Inf-Silent-Kunoichi-v0.2-2x7B/f611991b-11c1-4232-bc63-8cf2942605ae.json
deleted file mode 100644
index 3989ce32015effcf7860d920a6e93cc897661658..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Inf-Silent-Kunoichi-v0.2-2x7B/f611991b-11c1-4232-bc63-8cf2942605ae.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Jacoby746_Inf-Silent-Kunoichi-v0.2-2x7B/1762652579.650832",
- "retrieved_timestamp": "1762652579.650833",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B",
- "developer": "Jacoby746",
- "inference_platform": "unknown",
- "id": "Jacoby746/Inf-Silent-Kunoichi-v0.2-2x7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3636019095998617
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5209417299963208
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06268882175226587
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43197916666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32721077127659576
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 12.879
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Athena-4x7B/27d9d5c2-39d8-45e5-9614-a343144f05d8.json b/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Athena-4x7B/27d9d5c2-39d8-45e5-9614-a343144f05d8.json
deleted file mode 100644
index af4a11e7b8faef1a6e9ed7179a9e3df9c700ebdc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Athena-4x7B/27d9d5c2-39d8-45e5-9614-a343144f05d8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Jacoby746_Proto-Athena-4x7B/1762652579.651071",
- "retrieved_timestamp": "1762652579.651072",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Jacoby746/Proto-Athena-4x7B",
- "developer": "Jacoby746",
- "inference_platform": "unknown",
- "id": "Jacoby746/Proto-Athena-4x7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37029636918930664
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5106547638742905
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0649546827794562
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43477083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32064494680851063
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 24.154
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Athena-v0.2-4x7B/060feab1-4ce6-44a9-8ae2-c06468dd4dc9.json b/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Athena-v0.2-4x7B/060feab1-4ce6-44a9-8ae2-c06468dd4dc9.json
deleted file mode 100644
index b9d97bcdbb02fecba1c0c60ba081908b0106f121..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Athena-v0.2-4x7B/060feab1-4ce6-44a9-8ae2-c06468dd4dc9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Jacoby746_Proto-Athena-v0.2-4x7B/1762652579.651291",
- "retrieved_timestamp": "1762652579.6512918",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Jacoby746/Proto-Athena-v0.2-4x7B",
- "developer": "Jacoby746",
- "inference_platform": "unknown",
- "id": "Jacoby746/Proto-Athena-v0.2-4x7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37524213531208306
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5067731005424964
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0634441087613293
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42128125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3197307180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 24.154
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Harpy-Blazing-Light-v0.1-2x7B/f7455f30-e04e-4bc6-9d71-e33272d4577c.json b/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Harpy-Blazing-Light-v0.1-2x7B/f7455f30-e04e-4bc6-9d71-e33272d4577c.json
deleted file mode 100644
index dac4e9f499b6bc4c46c07642d373c96b62d29dea..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Harpy-Blazing-Light-v0.1-2x7B/f7455f30-e04e-4bc6-9d71-e33272d4577c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Jacoby746_Proto-Harpy-Blazing-Light-v0.1-2x7B/1762652579.651509",
- "retrieved_timestamp": "1762652579.65151",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B",
- "developer": "Jacoby746",
- "inference_platform": "unknown",
- "id": "Jacoby746/Proto-Harpy-Blazing-Light-v0.1-2x7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4904719477652628
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5186849053052595
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07477341389728097
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44496874999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33011968085106386
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 12.879
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Harpy-Spark-v0.1-7B/420cf07c-f043-49db-a62d-91e0c21aff2f.json b/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Harpy-Spark-v0.1-7B/420cf07c-f043-49db-a62d-91e0c21aff2f.json
deleted file mode 100644
index 0d64a109d31e646433b0af2a899c49ab26fc5d3d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Jacoby746/Jacoby746_Proto-Harpy-Spark-v0.1-7B/420cf07c-f043-49db-a62d-91e0c21aff2f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Jacoby746_Proto-Harpy-Spark-v0.1-7B/1762652579.651721",
- "retrieved_timestamp": "1762652579.651722",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Jacoby746/Proto-Harpy-Spark-v0.1-7B",
- "developer": "Jacoby746",
- "inference_platform": "unknown",
- "id": "Jacoby746/Proto-Harpy-Spark-v0.1-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43326928106313467
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4735771808296548
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.061933534743202415
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43166666666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30693151595744683
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen-0.5B-DPO-1epoch/7da8cc7e-791f-420d-9004-b29ddf54e381.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen-0.5B-DPO-1epoch/7da8cc7e-791f-420d-9004-b29ddf54e381.json
deleted file mode 100644
index ea513cdb8422a609d6f06055482171af1f1cbc9f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen-0.5B-DPO-1epoch/7da8cc7e-791f-420d-9004-b29ddf54e381.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-DPO-1epoch/1762652579.651926",
- "retrieved_timestamp": "1762652579.651926",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen-0.5B-DPO-1epoch",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen-0.5B-DPO-1epoch"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26473313031644924
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31907502434278595
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.028700906344410877
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2525167785234899
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33517708333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15575132978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen-0.5B-DPO-5epoch/42960491-549f-42bb-9669-5231ca0c436b.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen-0.5B-DPO-5epoch/42960491-549f-42bb-9669-5231ca0c436b.json
deleted file mode 100644
index 5ed4c54e5c6b5544a087a650856d0d87526d3948..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen-0.5B-DPO-5epoch/42960491-549f-42bb-9669-5231ca0c436b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-DPO-5epoch/1762652579.65218",
- "retrieved_timestamp": "1762652579.652181",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen-0.5B-DPO-5epoch",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen-0.5B-DPO-5epoch"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25701472094043804
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3112109544868782
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04003021148036254
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24328859060402686
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33796875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15325797872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1/46c6ab7f-33a0-4e72-9a63-b24da3f9c4d6.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1/46c6ab7f-33a0-4e72-9a63-b24da3f9c4d6.json
deleted file mode 100644
index c549549686e65e871b7b6960f638dfed7ba11f69..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1/46c6ab7f-33a0-4e72-9a63-b24da3f9c4d6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1/1762652579.653574",
- "retrieved_timestamp": "1762652579.653575",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-DPO-1epoch_v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24687274210206694
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3260313037664168
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06495468277945618
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34336458333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1574966755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1/1ff4251b-d01a-4ced-8868-776210e1ecb6.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1/1ff4251b-d01a-4ced-8868-776210e1ecb6.json
deleted file mode 100644
index 780b927eeff17fc43315399cc80c737949bd8426..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1/1ff4251b-d01a-4ced-8868-776210e1ecb6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1/1762652579.6538298",
- "retrieved_timestamp": "1762652579.6538298",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-IRPO-1epoch_v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2605863553150086
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3308028437367363
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04984894259818731
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3288229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16256648936170212
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1/c3c5cb61-3c4f-4796-9d3c-493618db0f91.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1/c3c5cb61-3c4f-4796-9d3c-493618db0f91.json
deleted file mode 100644
index 31939aed089275eb7157b253e38d5879ead6590e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1/c3c5cb61-3c4f-4796-9d3c-493618db0f91.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1/1762652579.654063",
- "retrieved_timestamp": "1762652579.6540642",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT-MDPO-1epoch_v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2529178136234081
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3261949089625076
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05664652567975831
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.330125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15757978723404256
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT/48e6f9aa-5034-4653-8832-b0a16bf01079.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT/48e6f9aa-5034-4653-8832-b0a16bf01079.json
deleted file mode 100644
index b4c1ff825f7fe504791a994b38a90b60c14a1bb4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-Instruct-SFT/48e6f9aa-5034-4653-8832-b0a16bf01079.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-Instruct-SFT/1762652579.65331",
- "retrieved_timestamp": "1762652579.653311",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-Instruct-SFT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27677340567472086
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3253697801563151
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03927492447129909
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33415625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15201130319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-2ep/00efca13-0d04-4700-a90f-bd621a971555.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-2ep/00efca13-0d04-4700-a90f-bd621a971555.json
deleted file mode 100644
index bf9ed72a2863a29a99b87375d8c6c43f64ff92c2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-2ep/00efca13-0d04-4700-a90f-bd621a971555.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-2ep/1762652579.654743",
- "retrieved_timestamp": "1762652579.6547441",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-2ep"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2140498322229462
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3172227797719337
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.026435045317220542
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24664429530201343
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34727083333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15367353723404256
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-3ep/f357f4eb-1837-4ab2-ad4b-9cc8a9054517.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-3ep/f357f4eb-1837-4ab2-ad4b-9cc8a9054517.json
deleted file mode 100644
index 1966e91ff858ea2ec63fdae7d498522fe3fe5906..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-3ep/f357f4eb-1837-4ab2-ad4b-9cc8a9054517.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-3ep/1762652579.6549618",
- "retrieved_timestamp": "1762652579.654963",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-3ep"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22573992561957826
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3064261556890236
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.026435045317220542
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2483221476510067
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36606249999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15317486702127658
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-5ep/206c756e-1edc-491f-9f86-7e00c7ab7085.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-5ep/206c756e-1edc-491f-9f86-7e00c7ab7085.json
deleted file mode 100644
index 81e0cdb9275f3090704c6c31599d0d2fb9e04cb0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-5ep/206c756e-1edc-491f-9f86-7e00c7ab7085.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-4-5ep/1762652579.655172",
- "retrieved_timestamp": "1762652579.655173",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4-5ep"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19868726091215752
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31044747322019184
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.019637462235649546
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2533557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3406666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15575132978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4/7d591ed9-5802-43a3-bb38-ec45b69adb08.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4/7d591ed9-5802-43a3-bb38-ec45b69adb08.json
deleted file mode 100644
index d90084860acfa23d7d0a2c34faef2c6b569dec74..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-4/7d591ed9-5802-43a3-bb38-ec45b69adb08.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-4/1762652579.654527",
- "retrieved_timestamp": "1762652579.654527",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2019596891802639
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3017092819749249
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0188821752265861
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25083892617449666
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3446354166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16190159574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-2ep/fde79985-6832-4315-8650-fdcf9ad68087.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-2ep/fde79985-6832-4315-8650-fdcf9ad68087.json
deleted file mode 100644
index df967183fadbfd6b8fc08530e58a0253dbd420d8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-2ep/fde79985-6832-4315-8650-fdcf9ad68087.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-2ep/1762652579.655605",
- "retrieved_timestamp": "1762652579.655606",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-2ep"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19706379074189817
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3224699194774388
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.052870090634441085
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26929530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3367604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1651429521276596
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-3ep/aef8fd41-ac51-4fb5-b8ae-78ebca9b4215.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-3ep/aef8fd41-ac51-4fb5-b8ae-78ebca9b4215.json
deleted file mode 100644
index 5ccf89b42c945752a3c3790999c367670f0bea6b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-3ep/aef8fd41-ac51-4fb5-b8ae-78ebca9b4215.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-3ep/1762652579.655815",
- "retrieved_timestamp": "1762652579.6558158",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-3ep"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2241164554493189
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32468117082421427
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05362537764350453
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3353333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16888297872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-5ep/b5cdb9c2-d81a-4e0b-817a-3e101d122e7a.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-5ep/b5cdb9c2-d81a-4e0b-817a-3e101d122e7a.json
deleted file mode 100644
index 33f1c3627536969ce3a8d3c373f73948fcafa8a6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-5ep/b5cdb9c2-d81a-4e0b-817a-3e101d122e7a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-5-5ep/1762652579.656047",
- "retrieved_timestamp": "1762652579.656048",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5-5ep"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22918744486850445
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3259343389530942
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05211480362537765
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3235208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16879986702127658
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5/3eac4497-66af-4fc6-bf89-459631e4a418.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5/3eac4497-66af-4fc6-bf89-459631e4a418.json
deleted file mode 100644
index ee3093249cc664e698b7c4c514e9a95c6713d317..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-1e-5/3eac4497-66af-4fc6-bf89-459631e4a418.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-1e-5/1762652579.6553931",
- "retrieved_timestamp": "1762652579.655394",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-1e-5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1985875255433361
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3139860294769257
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0377643504531722
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34603125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1697972074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-2ep/9d58433f-a74c-4345-bd47-a8f2c4e2361e.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-2ep/9d58433f-a74c-4345-bd47-a8f2c4e2361e.json
deleted file mode 100644
index adac30fbeeb0196fc19008768d63fbc99c970fa5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-2ep/9d58433f-a74c-4345-bd47-a8f2c4e2361e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-2ep/1762652579.656457",
- "retrieved_timestamp": "1762652579.656457",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-2ep"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18307535117931534
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29839616748934167
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.024924471299093656
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2424496644295302
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3567604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1484375
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-3ep/e8109e5c-6276-4935-bfa0-fc969f118d3b.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-3ep/e8109e5c-6276-4935-bfa0-fc969f118d3b.json
deleted file mode 100644
index 66821f8bf4551461b9c846b3318359f0d681ad43..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-3ep/e8109e5c-6276-4935-bfa0-fc969f118d3b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-3ep/1762652579.656671",
- "retrieved_timestamp": "1762652579.656672",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-3ep"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1989620872617987
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3109875129533253
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015105740181268883
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3449479166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14162234042553193
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-5ep/9d6b36c5-c0ec-4ab1-a12b-47efc34ebfc8.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-5ep/9d6b36c5-c0ec-4ab1-a12b-47efc34ebfc8.json
deleted file mode 100644
index 872b240379c2eac54f8cb2c405dcb6819ec58b54..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-5ep/9d6b36c5-c0ec-4ab1-a12b-47efc34ebfc8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-4-5ep/1762652579.656877",
- "retrieved_timestamp": "1762652579.656878",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4-5ep"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18971994308434953
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936418449815176
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01812688821752266
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26929530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38739583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13364361702127658
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4/5e307ea5-70da-476a-8d9e-1d488385565f.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4/5e307ea5-70da-476a-8d9e-1d488385565f.json
deleted file mode 100644
index 6e55d96ceda351608949e7ed4dbc1dd4dfeddfdf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-4/5e307ea5-70da-476a-8d9e-1d488385565f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-4/1762652579.656255",
- "retrieved_timestamp": "1762652579.656256",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2034335562972912
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2935549587263229
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02416918429003021
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3434270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14128989361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam/343b7db1-8f96-4998-a6fb-5eb0aa1b6b21.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam/343b7db1-8f96-4998-a6fb-5eb0aa1b6b21.json
deleted file mode 100644
index b011fb41ead7a2d5026abe0aa129cda90d0fe4ad..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam/343b7db1-8f96-4998-a6fb-5eb0aa1b6b21.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam/1762652579.6580968",
- "retrieved_timestamp": "1762652579.658098",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_3e-7-3ep_0alp_5lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24105262924595627
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31671815484837784
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03474320241691843
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.330125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15625
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam/bfa11262-d7bd-44b3-8b8b-81013f1e0c24.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam/bfa11262-d7bd-44b3-8b8b-81013f1e0c24.json
deleted file mode 100644
index 10ec819145529016434be6807df27764fc56572e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam/bfa11262-d7bd-44b3-8b8b-81013f1e0c24.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam/1762652579.658342",
- "retrieved_timestamp": "1762652579.6583428",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-1ep_0alp_5lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23685598656010498
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3260038632940968
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.045317220543806644
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3355208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15699800531914893
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam/902849f8-dc58-4e01-ba30-ff95412272d3.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam/902849f8-dc58-4e01-ba30-ff95412272d3.json
deleted file mode 100644
index 7a6c3be960dc44660a308b3a8dceb42b12429559..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam/902849f8-dc58-4e01-ba30-ff95412272d3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam/1762652579.6585789",
- "retrieved_timestamp": "1762652579.65858",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-2ep_0alp_5lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22623971063444992
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3261540051256346
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03474320241691843
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3408229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15408909574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam/4c5cace1-70ce-48f3-aad1-d141924c24de.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam/4c5cace1-70ce-48f3-aad1-d141924c24de.json
deleted file mode 100644
index 10973e16c32c896edc716d894de1f4f634ee00e5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam/4c5cace1-70ce-48f3-aad1-d141924c24de.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam/1762652579.6588218",
- "retrieved_timestamp": "1762652579.658823",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-6-3ep_0alp_5lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25079455843827714
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3199331515135054
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04078549848942598
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33545833333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15550199468085107
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam/e42051f2-90f2-4fbe-a4bd-623482abf10f.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam/e42051f2-90f2-4fbe-a4bd-623482abf10f.json
deleted file mode 100644
index 63a1ad5574077283d87be6a18fd11573c3fc8d9d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam/e42051f2-90f2-4fbe-a4bd-623482abf10f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam/1762652579.6591082",
- "retrieved_timestamp": "1762652579.659109",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-1ep_0alp_5lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.238979241745236
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31816042712158116
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04003021148036254
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33279166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15600066489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam/e70423b6-5a7d-4745-b5a3-968f363a3b7a.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam/e70423b6-5a7d-4745-b5a3-968f363a3b7a.json
deleted file mode 100644
index eea17ecf3360d1277d2601549c91e168b7ff5520..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam/e70423b6-5a7d-4745-b5a3-968f363a3b7a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam/1762652579.6593359",
- "retrieved_timestamp": "1762652579.659337",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-2ep_0alp_5lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2423015376977531
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3154080373582542
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03474320241691843
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33279166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15475398936170212
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam/2a7b8fa7-5c16-414b-968e-ec7b06e8143c.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam/2a7b8fa7-5c16-414b-968e-ec7b06e8143c.json
deleted file mode 100644
index 2cfbbd2f508052afcd63658bd0a4cca50df5f18b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam/2a7b8fa7-5c16-414b-968e-ec7b06e8143c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam/1762652579.6595562",
- "retrieved_timestamp": "1762652579.659557",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPOP_5e-7-3ep_0alp_5lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24932069132124984
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3189717077702392
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04380664652567976
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.334125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15608377659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam/dfa1b391-4b18-4ac0-a397-a983070647a7.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam/dfa1b391-4b18-4ac0-a397-a983070647a7.json
deleted file mode 100644
index c6c104c7161f07f49f4368d1f4d4b77ec5ab8ae9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam/dfa1b391-4b18-4ac0-a397-a983070647a7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam/1762652579.660001",
- "retrieved_timestamp": "1762652579.660005",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-1ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2541667220752049
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31671883869615397
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04078549848942598
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27181208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32885416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15799534574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam/96d31674-0011-4621-9131-31b5f6ede223.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam/96d31674-0011-4621-9131-31b5f6ede223.json
deleted file mode 100644
index 1e10943dc6d4d5211fda04c99de81bafb88ef97f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam/96d31674-0011-4621-9131-31b5f6ede223.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam/1762652579.660342",
- "retrieved_timestamp": "1762652579.660342",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-2ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24507418095098782
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3159533058861391
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04078549848942598
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3301875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15608377659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam/d8663966-a5f5-40e6-a327-1255f7c3395f.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam/d8663966-a5f5-40e6-a327-1255f7c3395f.json
deleted file mode 100644
index 665c31d06a2ff1b731ad68cca1507c89b5261103..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam/d8663966-a5f5-40e6-a327-1255f7c3395f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam/1762652579.6605709",
- "retrieved_timestamp": "1762652579.6605718",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-6-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25574032456105356
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31419826948787827
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04003021148036254
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3315208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1574966755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam/a1fadf30-c543-4b73-bf28-0cb9cb2fc91f.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam/a1fadf30-c543-4b73-bf28-0cb9cb2fc91f.json
deleted file mode 100644
index 934cd9bb3a4d34ea62953851aeecaeeb7dd18030..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam/a1fadf30-c543-4b73-bf28-0cb9cb2fc91f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam/1762652579.660821",
- "retrieved_timestamp": "1762652579.660822",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-2ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26053648763059795
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3166968072745491
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03625377643504532
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.334125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15766289893617022
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam/57b69bd0-73f6-42e0-bd9e-984bb1e6a553.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam/57b69bd0-73f6-42e0-bd9e-984bb1e6a553.json
deleted file mode 100644
index 75b1ca096c6d75c5b6addc165c5bd85e75ca6d4d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam/57b69bd0-73f6-42e0-bd9e-984bb1e6a553.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam/1762652579.661046",
- "retrieved_timestamp": "1762652579.661047",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_1e-7-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25781371206177384
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31732037273750646
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.035498489425981876
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32879166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1583277925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam/93597efa-6da8-4074-8049-6ec66f499cbf.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam/93597efa-6da8-4074-8049-6ec66f499cbf.json
deleted file mode 100644
index 08b3069adbfee31c0448fe9ef40fd62e85911baf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam/93597efa-6da8-4074-8049-6ec66f499cbf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam/1762652579.661258",
- "retrieved_timestamp": "1762652579.661259",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-1ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23353369060758786
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3197619098572027
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03851963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2751677852348994
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32755208333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1580784574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam/00a5dc4a-6ffb-4e6a-9547-416ff29e0ded.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam/00a5dc4a-6ffb-4e6a-9547-416ff29e0ded.json
deleted file mode 100644
index 2d14bdae9cceb4ff005c9f2ce2528e6958cb80ea..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam/00a5dc4a-6ffb-4e6a-9547-416ff29e0ded.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam/1762652579.6614761",
- "retrieved_timestamp": "1762652579.6614761",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-2ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24719743613611883
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32262707839652854
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05060422960725076
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32621875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15375664893617022
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam/13cf92c4-fbeb-445a-85d6-bf71ce2e68c9.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam/13cf92c4-fbeb-445a-85d6-bf71ce2e68c9.json
deleted file mode 100644
index f3e8872679f5575bdbfb948c88541e48f3113ec9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam/13cf92c4-fbeb-445a-85d6-bf71ce2e68c9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam/1762652579.661691",
- "retrieved_timestamp": "1762652579.661692",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_2e-6-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2474223948013493
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32291208173140107
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04154078549848943
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32748958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15392287234042554
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam/14a173b6-4d56-4d22-a888-57ea46d72e67.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam/14a173b6-4d56-4d22-a888-57ea46d72e67.json
deleted file mode 100644
index 70d5aec592e87582c90c32bd5cc6de8892e9a674..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam/14a173b6-4d56-4d22-a888-57ea46d72e67.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam/1762652579.6619039",
- "retrieved_timestamp": "1762652579.6619048",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-1ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24027801788144343
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32453683161596314
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04305135951661632
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28187919463087246
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32621875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1573304521276596
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam/f46cc7cb-27e8-4723-9ecf-cbeef9789b25.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam/f46cc7cb-27e8-4723-9ecf-cbeef9789b25.json
deleted file mode 100644
index 661a5020640221096403b8f805b96818a11d3b6a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam/f46cc7cb-27e8-4723-9ecf-cbeef9789b25.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam/1762652579.662116",
- "retrieved_timestamp": "1762652579.662117",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-2ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23680611887569425
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3224293761524927
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04607250755287009
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33548958333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15159574468085107
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam/de200bef-71a2-4efb-bc34-02f69385b636.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam/de200bef-71a2-4efb-bc34-02f69385b636.json
deleted file mode 100644
index e2c666a3c35324671f874cc15458065636aa59ac..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam/de200bef-71a2-4efb-bc34-02f69385b636.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam/1762652579.662327",
- "retrieved_timestamp": "1762652579.662327",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-6-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23718068059415687
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32477052921998556
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04758308157099698
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3394270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1550033244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam/7ed1ff6a-fe4d-4f78-bbc6-c5e64a7fbfc1.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam/7ed1ff6a-fe4d-4f78-bbc6-c5e64a7fbfc1.json
deleted file mode 100644
index 6dbffccf7f6cd5176b97a52261dbf23958b5a086..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam/7ed1ff6a-fe4d-4f78-bbc6-c5e64a7fbfc1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam/1762652579.6625469",
- "retrieved_timestamp": "1762652579.662548",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_3e-7-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24992021170494289
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31806007750183346
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04154078549848943
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3288229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15741356382978725
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam/82d38084-32b1-4224-810c-b66dd337b3fe.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam/82d38084-32b1-4224-810c-b66dd337b3fe.json
deleted file mode 100644
index 44d7b9796c9ebbf8cd58a36518a5716dad2f555c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam/82d38084-32b1-4224-810c-b66dd337b3fe.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam/1762652579.662755",
- "retrieved_timestamp": "1762652579.662755",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-1ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23810489501190177
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32421844512358233
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04984894259818731
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3328229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15724734042553193
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam/972e0d76-63bb-431b-9d9b-68dd6b738447.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam/972e0d76-63bb-431b-9d9b-68dd6b738447.json
deleted file mode 100644
index ed1719615b785fd08bd70b88a0655c992923268b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam/972e0d76-63bb-431b-9d9b-68dd6b738447.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam/1762652579.662969",
- "retrieved_timestamp": "1762652579.662969",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-2ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2420765790325226
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3224798177796032
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04003021148036254
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3408229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14960106382978725
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam/7337bc31-54b6-43b9-bb26-63f2273ffc7e.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam/7337bc31-54b6-43b9-bb26-63f2273ffc7e.json
deleted file mode 100644
index 7304968d3dba551508faa3f2eb70cc7502ab368c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam/7337bc31-54b6-43b9-bb26-63f2273ffc7e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam/1762652579.663178",
- "retrieved_timestamp": "1762652579.663179",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-6-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23805502732749106
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32652003776870003
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0445619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34079166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14985039893617022
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam/c2e14e90-6c18-4a9f-9d68-a9d98960dd32.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam/c2e14e90-6c18-4a9f-9d68-a9d98960dd32.json
deleted file mode 100644
index 83441688d5e47fa989aad53e485e1dc89c4d9252..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam/c2e14e90-6c18-4a9f-9d68-a9d98960dd32.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam/1762652579.663386",
- "retrieved_timestamp": "1762652579.663386",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_1ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25264298727376694
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3176911636441555
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04380664652567976
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27348993288590606
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33415625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15724734042553193
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam/972d45c5-acd1-4e54-8310-9ff56c5fb061.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam/972d45c5-acd1-4e54-8310-9ff56c5fb061.json
deleted file mode 100644
index 91f3781a85b22153b7b0e98882eb4a86c543c935..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam/972d45c5-acd1-4e54-8310-9ff56c5fb061.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam/1762652579.6636329",
- "retrieved_timestamp": "1762652579.6636338",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_2ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24572356901909154
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.316045450978746
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0445619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33015625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15716422872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam/2faf738f-64f4-4e14-8011-9e00a4e2dd6a.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam/2faf738f-64f4-4e14-8011-9e00a4e2dd6a.json
deleted file mode 100644
index df7b41e16a5f46abd4b64db02adbdaa08c890d32..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam/2faf738f-64f4-4e14-8011-9e00a4e2dd6a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam/1762652579.663875",
- "retrieved_timestamp": "1762652579.663876",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_5e-7_3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2441998342176536
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3193544697854515
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04833836858006042
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27348993288590606
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33148958333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1566655585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam/15b28d99-e02a-4021-899b-adef87dfe96a.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam/15b28d99-e02a-4021-899b-adef87dfe96a.json
deleted file mode 100644
index 68a2f42856f1a84ebff713f8a88e4a30cd175d08..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam/15b28d99-e02a-4021-899b-adef87dfe96a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam/1762652579.6641018",
- "retrieved_timestamp": "1762652579.664103",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_1ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26036139664977814
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31784656431310543
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.035498489425981876
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3288229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15674867021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam/b643171e-adaa-4f6e-8860-542950810578.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam/b643171e-adaa-4f6e-8860-542950810578.json
deleted file mode 100644
index 36b43cb3e37bc6cc42e8e9bed3856503cb8d1b99..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam/b643171e-adaa-4f6e-8860-542950810578.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam/1762652579.664332",
- "retrieved_timestamp": "1762652579.664333",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_2ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24899599728719796
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3172899997448431
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03927492447129909
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3301875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15691489361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam/a26204c0-90c5-44fd-8814-d69c6e4f4585.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam/a26204c0-90c5-44fd-8814-d69c6e4f4585.json
deleted file mode 100644
index 8d12dd0b0389ac52ac15ba72309dff0d9f23c059..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam/a26204c0-90c5-44fd-8814-d69c6e4f4585.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam/1762652579.6645608",
- "retrieved_timestamp": "1762652579.664562",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-DPO_7e-7_3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26036139664977814
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3149566664115098
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0377643504531722
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3341875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15658244680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam/bc45fc30-c472-471a-b0c8-f68b9397d844.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam/bc45fc30-c472-471a-b0c8-f68b9397d844.json
deleted file mode 100644
index 71ca238e09c3d791932fd4dbceb714974ddac9cb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam/bc45fc30-c472-471a-b0c8-f68b9397d844.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam/1762652579.664829",
- "retrieved_timestamp": "1762652579.66483",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-1ep_1alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2550410688085391
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3211026993947845
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04909365558912387
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32876041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15708111702127658
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam/dff1ec0f-99a6-493d-9f2c-a6a523455b7e.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam/dff1ec0f-99a6-493d-9f2c-a6a523455b7e.json
deleted file mode 100644
index a44d8dc1f10f5a386e0868fc7291b69290a3422c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam/dff1ec0f-99a6-493d-9f2c-a6a523455b7e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam/1762652579.665046",
- "retrieved_timestamp": "1762652579.665047",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-2ep_1alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24779695651981187
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3197773660515741
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04229607250755287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26677852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33145833333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15866023936170212
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam/a6385d82-407e-44b2-9148-9cbf8f353557.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam/a6385d82-407e-44b2-9148-9cbf8f353557.json
deleted file mode 100644
index c31a795660f47dc096f56e7087d7d27ad97c3a46..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam/a6385d82-407e-44b2-9148-9cbf8f353557.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam/1762652579.6652648",
- "retrieved_timestamp": "1762652579.665266",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_1e-7-3ep_1alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24747226248576
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32246983072126806
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04003021148036254
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.330125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15558510638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam/17fb5411-3dc6-44b7-971b-8a080ed93de0.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam/17fb5411-3dc6-44b7-971b-8a080ed93de0.json
deleted file mode 100644
index 80e4a73e84d9931904cacd47e7f13ffad44e3195..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam/17fb5411-3dc6-44b7-971b-8a080ed93de0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam/1762652579.665471",
- "retrieved_timestamp": "1762652579.665472",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_3e-7-3ep_1alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2590127528291599
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3185132309797721
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03625377643504532
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3275208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15857712765957446
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam/670b89a5-2a83-480e-a33b-6903609a10dc.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam/670b89a5-2a83-480e-a33b-6903609a10dc.json
deleted file mode 100644
index def76622b23ec0279f5318dd7a2780bc588e3cfd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam/670b89a5-2a83-480e-a33b-6903609a10dc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam/1762652579.665683",
- "retrieved_timestamp": "1762652579.665684",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-1ep_1alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23233464984020177
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3179474145066817
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.045317220543806644
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15475398936170212
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam/e660922f-847b-4993-91a4-b96809ff1e85.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam/e660922f-847b-4993-91a4-b96809ff1e85.json
deleted file mode 100644
index 6c72674f847d3bc10ef647bd2f9e5ffa58b9104d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam/e660922f-847b-4993-91a4-b96809ff1e85.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam/1762652579.665889",
- "retrieved_timestamp": "1762652579.66589",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-2ep_1alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23151017079127825
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3259705145690442
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04154078549848943
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3383125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15209441489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam/41d18fa1-d19e-47cf-8fec-b04725ff097f.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam/41d18fa1-d19e-47cf-8fec-b04725ff097f.json
deleted file mode 100644
index 99799658c8418e4647d70bbbd6aa5826f63fff4b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam/41d18fa1-d19e-47cf-8fec-b04725ff097f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam/1762652579.666097",
- "retrieved_timestamp": "1762652579.6660979",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-6-3ep_1alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2298368329366082
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33204616486918276
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04305135951661632
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33288541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15674867021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam/122a997d-f452-4511-96f3-f31ecb5d8d7b.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam/122a997d-f452-4511-96f3-f31ecb5d8d7b.json
deleted file mode 100644
index 293f5705039bf91f26c64625c34c8db71922f786..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam/122a997d-f452-4511-96f3-f31ecb5d8d7b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam/1762652579.666312",
- "retrieved_timestamp": "1762652579.666313",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-1ep_1alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24687274210206694
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3178544697854515
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04154078549848943
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33015625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1574966755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam/c0d7514b-6809-49d7-9193-38e9c9ad03be.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam/c0d7514b-6809-49d7-9193-38e9c9ad03be.json
deleted file mode 100644
index 56db8cb812f80294cbf00f2d533da7357859e459..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam/c0d7514b-6809-49d7-9193-38e9c9ad03be.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam/1762652579.666527",
- "retrieved_timestamp": "1762652579.666527",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-2ep_1alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2520434668900739
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3167822100533442
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03700906344410876
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2751677852348993
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3328229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15757978723404256
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam/923f6446-f9fb-47ae-b585-ac131d75c107.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam/923f6446-f9fb-47ae-b585-ac131d75c107.json
deleted file mode 100644
index 78987f399f082c4a0579d4ab7477484d858ad4cd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam/923f6446-f9fb-47ae-b585-ac131d75c107.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam/1762652579.6667368",
- "retrieved_timestamp": "1762652579.666738",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-IRPO_5e-7-3ep_1alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2665815591519391
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3190675981811982
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03474320241691843
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27181208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32885416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1566655585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam/da330322-f144-44bb-833a-7b92c11f3888.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam/da330322-f144-44bb-833a-7b92c11f3888.json
deleted file mode 100644
index 4de917bf9d2b31894a45848a4c036cc05b9b4f34..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam/da330322-f144-44bb-833a-7b92c11f3888.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam/1762652579.667231",
- "retrieved_timestamp": "1762652579.667236",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-1ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24992021170494289
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31779941873624934
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03700906344410876
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.334125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15625
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam/10014f98-cae2-435b-b6e7-17064bb079a5.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam/10014f98-cae2-435b-b6e7-17064bb079a5.json
deleted file mode 100644
index 3d3080b24ddc0e0062e797811ce73130b658c5ae..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam/10014f98-cae2-435b-b6e7-17064bb079a5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam/1762652579.6676302",
- "retrieved_timestamp": "1762652579.6676311",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-2ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24170201731406002
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3178391594145879
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04003021148036254
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33279166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1574966755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam/c6d4f510-abc8-4524-99b0-e6d98c6e9aa9.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam/c6d4f510-abc8-4524-99b0-e6d98c6e9aa9.json
deleted file mode 100644
index a4f4ad5fdf93c1efa40455c3ee045f73aa9f1346..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam/c6d4f510-abc8-4524-99b0-e6d98c6e9aa9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam/1762652579.66787",
- "retrieved_timestamp": "1762652579.667871",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_0.5_1e-7-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2562401095759252
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31904280434381205
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04229607250755287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.334125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15757978723404256
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam/b4d7f827-d1cb-46c6-9eea-248867fdc07f.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam/b4d7f827-d1cb-46c6-9eea-248867fdc07f.json
deleted file mode 100644
index 67331d71606b257dbd2edd0e814d1c8144b02c6a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam/b4d7f827-d1cb-46c6-9eea-248867fdc07f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam/1762652579.6680949",
- "retrieved_timestamp": "1762652579.6680949",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2408276705807258
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31647277641099675
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04305135951661632
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27348993288590606
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3315208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1556682180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam/d1d2f75d-ddd8-42cb-9de8-1f327479eb9b.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam/d1d2f75d-ddd8-42cb-9de8-1f327479eb9b.json
deleted file mode 100644
index 618840afd3b2bc6245cb53fc11464b95595d0076..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam/d1d2f75d-ddd8-42cb-9de8-1f327479eb9b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam/1762652579.668304",
- "retrieved_timestamp": "1762652579.668305",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_1ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24812165055386376
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3204166266783764
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04758308157099698
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3301875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15915890957446807
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam/9df1e491-fa9d-41c7-ae46-8cc70a47a60f.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam/9df1e491-fa9d-41c7-ae46-8cc70a47a60f.json
deleted file mode 100644
index 8c0f6f95c12285b757c8fcb753925f7937facbd0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam/9df1e491-fa9d-41c7-ae46-8cc70a47a60f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam/1762652579.668525",
- "retrieved_timestamp": "1762652579.6685262",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_1e-6_2ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2544914161092568
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3185709286639082
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04984894259818731
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27181208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32885416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15608377659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam/6c070a2b-9f5e-46cd-b8ba-b6220509b85d.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam/6c070a2b-9f5e-46cd-b8ba-b6220509b85d.json
deleted file mode 100644
index 275cfeeedcf04168bd2f61e559dd10604042bfa1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam/6c070a2b-9f5e-46cd-b8ba-b6220509b85d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam/1762652579.668755",
- "retrieved_timestamp": "1762652579.668756",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2519935992056632
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.320368681472897
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03927492447129909
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32615625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15375664893617022
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam/4496da44-d4bd-40a8-8f91-56b2cb2fa766.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam/4496da44-d4bd-40a8-8f91-56b2cb2fa766.json
deleted file mode 100644
index 5fb7abe0c8c5ab917376b7b32c5fb7801a15164a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam/4496da44-d4bd-40a8-8f91-56b2cb2fa766.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam/1762652579.6689868",
- "retrieved_timestamp": "1762652579.668988",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_1ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23146030310686755
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32128474090743103
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.045317220543806644
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32221875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15824468085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam/69c6593c-6e84-498f-8d68-62c1809a4606.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam/69c6593c-6e84-498f-8d68-62c1809a4606.json
deleted file mode 100644
index eca99f8af4a481673e2529bb618b07090eb3b439..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam/69c6593c-6e84-498f-8d68-62c1809a4606.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam/1762652579.669204",
- "retrieved_timestamp": "1762652579.669204",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_2e-6_2ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25149381419079153
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31867127828365593
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04305135951661632
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27181208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32888541666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15392287234042554
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam/b1c0f775-987a-4da5-9451-09bf295b16ba.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam/b1c0f775-987a-4da5-9451-09bf295b16ba.json
deleted file mode 100644
index 4c7ae83da408a3ff529ee136ebc36f4bd521310a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam/b1c0f775-987a-4da5-9451-09bf295b16ba.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam/1762652579.669419",
- "retrieved_timestamp": "1762652579.66942",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-1ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24719743613611883
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3213274785812292
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03474320241691843
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3261875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15882646276595744
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam/c589d3d6-9d8b-45e3-a6c6-60f25d44349b.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam/c589d3d6-9d8b-45e3-a6c6-60f25d44349b.json
deleted file mode 100644
index ec9e8d468a1210c9ecbd772358e47df84536edfa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam/c589d3d6-9d8b-45e3-a6c6-60f25d44349b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam/1762652579.6696231",
- "retrieved_timestamp": "1762652579.669624",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-2ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24599839536873275
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32337658694524307
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0377643504531722
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33021875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15334109042553193
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam/1e76e5ee-1728-4756-8f13-d68ce1ca3a5e.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam/1e76e5ee-1728-4756-8f13-d68ce1ca3a5e.json
deleted file mode 100644
index 74d17a63602389cc0215385d64ca3e83ab28e7b8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam/1e76e5ee-1728-4756-8f13-d68ce1ca3a5e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam/1762652579.669835",
- "retrieved_timestamp": "1762652579.669836",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_3e-6-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25236816092412573
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3255638228201855
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05362537764350453
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33679166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15309175531914893
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam/a44985f9-2255-421b-93b9-fcb5761e17b8.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam/a44985f9-2255-421b-93b9-fcb5761e17b8.json
deleted file mode 100644
index c17027af336e2b6fd26240e475d6f6359f64d1dc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam/a44985f9-2255-421b-93b9-fcb5761e17b8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam/1762652579.670048",
- "retrieved_timestamp": "1762652579.670049",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-1ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2264646692996804
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3252098558034601
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04758308157099698
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27348993288590606
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32615625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1568317819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam/ad59cc80-784d-41bf-9a3e-9d9f286667d2.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam/ad59cc80-784d-41bf-9a3e-9d9f286667d2.json
deleted file mode 100644
index 209d0c973a6eba92f613123243cf069bb71cf2e9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam/ad59cc80-784d-41bf-9a3e-9d9f286667d2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam/1762652579.6702561",
- "retrieved_timestamp": "1762652579.6702569",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-2ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23016152697066006
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3224479825736107
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04380664652567976
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27684563758389263
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34079166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15001662234042554
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam/0b72d3c8-aaff-4eca-854d-07d132e9aa25.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam/0b72d3c8-aaff-4eca-854d-07d132e9aa25.json
deleted file mode 100644
index 1a86e6808b9d0f10cf5b71d14bf6e537f5b13333..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam/0b72d3c8-aaff-4eca-854d-07d132e9aa25.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam/1762652579.670511",
- "retrieved_timestamp": "1762652579.6705122",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-6-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25236816092412573
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3278027492189594
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04078549848942598
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33945833333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15209441489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam/021eca20-1a26-4eba-9006-fb005e91696d.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam/021eca20-1a26-4eba-9006-fb005e91696d.json
deleted file mode 100644
index 4542ead2cd172bf18055a8a660c880e71826b325..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam/021eca20-1a26-4eba-9006-fb005e91696d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam/1762652579.67072",
- "retrieved_timestamp": "1762652579.67072",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2657570801030156
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31752113645211816
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03625377643504532
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3301875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1574966755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam/8662faaa-8964-468a-991b-43b2f0449d48.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam/8662faaa-8964-468a-991b-43b2f0449d48.json
deleted file mode 100644
index b253d3bcad16d6d5d4395c6b3ab5af65babecab1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam/8662faaa-8964-468a-991b-43b2f0449d48.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam/1762652579.6709208",
- "retrieved_timestamp": "1762652579.6709208",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_1ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2487211709375568
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3189091360416723
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0377643504531722
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27181208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3275208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15949135638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam/56cad8c7-566f-46e5-9692-3c11f4408921.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam/56cad8c7-566f-46e5-9692-3c11f4408921.json
deleted file mode 100644
index 7e0155335a1af80af42d0db1923455da8de211ec..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam/56cad8c7-566f-46e5-9692-3c11f4408921.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam/1762652579.671123",
- "retrieved_timestamp": "1762652579.671123",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_5e-7_2ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2560151509106947
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3158776856286612
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0377643504531722
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27684563758389263
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3275208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15616688829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam/f86fb81b-29b8-425f-8129-ea054108a214.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam/f86fb81b-29b8-425f-8129-ea054108a214.json
deleted file mode 100644
index 8bd27e8eb43f5a733d92c03d1741956bc727c1f9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam/f86fb81b-29b8-425f-8129-ea054108a214.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam/1762652579.671335",
- "retrieved_timestamp": "1762652579.671336",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2498703440205322
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31561997255280577
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04003021148036254
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3301875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15558510638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam/3c5ff9bc-b33a-4557-9c76-ccc041de985c.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam/3c5ff9bc-b33a-4557-9c76-ccc041de985c.json
deleted file mode 100644
index 8d08a071a1635dba3e9a2d1e6b6434cfe61a9505..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam/3c5ff9bc-b33a-4557-9c76-ccc041de985c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam/1762652579.671542",
- "retrieved_timestamp": "1762652579.6715431",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_1ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.249595517670891
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31774285416798703
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.045317220543806644
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33148958333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1566655585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam/64e0c863-f33c-44d7-b244-e5288e5018fb.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam/64e0c863-f33c-44d7-b244-e5288e5018fb.json
deleted file mode 100644
index a1b8934c6ab48f03a3a9ac45b119b0f5b7721dac..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam/64e0c863-f33c-44d7-b244-e5288e5018fb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam/1762652579.6717582",
- "retrieved_timestamp": "1762652579.6717582",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep-MDPO_7e-7_2ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25149381419079153
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3172338500122228
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04380664652567976
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3275208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15533577127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep/09f59d70-2948-4eb6-a14e-2550c97b5542.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep/09f59d70-2948-4eb6-a14e-2550c97b5542.json
deleted file mode 100644
index 087019ad6afd865e431b3c5389074757cd4c109c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep/09f59d70-2948-4eb6-a14e-2550c97b5542.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-2ep/1762652579.6576698",
- "retrieved_timestamp": "1762652579.657671",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-2ep"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2201447714286981
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3217197270809481
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04078549848942598
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33669791666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17096077127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-3ep/57d9c59d-8cd8-4253-a076-8b16becc740e.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-3ep/57d9c59d-8cd8-4253-a076-8b16becc740e.json
deleted file mode 100644
index 61d07356b1b5c83f21169aa8372694a2b8aea366..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-3ep/57d9c59d-8cd8-4253-a076-8b16becc740e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-3ep/1762652579.671975",
- "retrieved_timestamp": "1762652579.671975",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-3ep"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22808813946993975
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3239538094779519
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.045317220543806644
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.330125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17461768617021275
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam/5fb209a6-3d82-4017-8e44-3615d7c50218.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam/5fb209a6-3d82-4017-8e44-3615d7c50218.json
deleted file mode 100644
index 1b753d0fcc51eef0094cc7f2e8d63f787ee06caf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam/5fb209a6-3d82-4017-8e44-3615d7c50218.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam/1762652579.672395",
- "retrieved_timestamp": "1762652579.672396",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25259311958935626
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.323809171214906
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03927492447129909
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3528229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15741356382978725
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep/2ccd9994-1d9c-40c4-85d0-c74af7544b6d.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep/2ccd9994-1d9c-40c4-85d0-c74af7544b6d.json
deleted file mode 100644
index 8b7bde6034a2246108e5af1888f3f7ddab48c945..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep/2ccd9994-1d9c-40c4-85d0-c74af7544b6d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep/1762652579.672603",
- "retrieved_timestamp": "1762652579.6726038",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_1ep"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24812165055386376
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31748404240871353
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03851963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34752083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15965757978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep/1f1f5c3d-4ee4-4ed8-adeb-9e83942a7e32.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep/1f1f5c3d-4ee4-4ed8-adeb-9e83942a7e32.json
deleted file mode 100644
index 67e79002ea5cf0622e73c3ba8a418f2d7e33be18..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep/1f1f5c3d-4ee4-4ed8-adeb-9e83942a7e32.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep/1762652579.672818",
- "retrieved_timestamp": "1762652579.672818",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_5e-7_3ep_0alp_0lam_2ep"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25476624245889795
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3199073234678175
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03851963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34348958333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15616688829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam/f9c4db8f-b56e-41cd-9c87-ba2d4b36520a.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam/f9c4db8f-b56e-41cd-9c87-ba2d4b36520a.json
deleted file mode 100644
index e244178345ee2c30fb0e92ea2304cb4993b3824d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam/f9c4db8f-b56e-41cd-9c87-ba2d4b36520a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam/1762652579.673032",
- "retrieved_timestamp": "1762652579.673033",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2423015376977531
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32193163799444524
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.033987915407854986
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35152083333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15633311170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep/d1ae295e-1364-442c-a3e4-ac2ad9884a78.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep/d1ae295e-1364-442c-a3e4-ac2ad9884a78.json
deleted file mode 100644
index f1ccd1501f6caecfac8012aa5b34f91090272c22..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep/d1ae295e-1364-442c-a3e4-ac2ad9884a78.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep/1762652579.673239",
- "retrieved_timestamp": "1762652579.67324",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_1ep"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24927082363683917
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3190945593427599
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03927492447129909
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34752083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15915890957446807
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep/86c29317-7d5f-42c2-a156-615d3c4a259d.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep/86c29317-7d5f-42c2-a156-615d3c4a259d.json
deleted file mode 100644
index 46a3812ffdfd15b7f8703566d5fce3b576c7ee29..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep/86c29317-7d5f-42c2-a156-615d3c4a259d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep/1762652579.673455",
- "retrieved_timestamp": "1762652579.6734562",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep-MDPO_7e-7_3ep_0alp_0lam_2ep"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24779695651981187
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3218405915852565
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04154078549848943
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26929530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35152083333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15558510638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep/70a5a5fb-9dd6-4b1c-a7ac-11155d5ef837.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep/70a5a5fb-9dd6-4b1c-a7ac-11155d5ef837.json
deleted file mode 100644
index 421ff332f60d3d7229811c6a3bf57375f284de4b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep/70a5a5fb-9dd6-4b1c-a7ac-11155d5ef837.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5-5ep/1762652579.6721878",
- "retrieved_timestamp": "1762652579.6721878",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5-5ep"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23478259905938464
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33076056644270485
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05060422960725076
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34088541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16954787234042554
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5/047ed340-ddb8-40ca-b1ee-10f12b182e43.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5/047ed340-ddb8-40ca-b1ee-10f12b182e43.json
deleted file mode 100644
index 0ca0debd8ea1eea5458f909bd90d384c29b45ad4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-2e-5/047ed340-ddb8-40ca-b1ee-10f12b182e43.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-2e-5/1762652579.65739",
- "retrieved_timestamp": "1762652579.657391",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-2e-5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2067558522498083
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3203968601167082
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03700906344410876
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26929530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3486666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16780252659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-2ep/94b65c53-7e0c-4506-bd19-82d23709d269.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-2ep/94b65c53-7e0c-4506-bd19-82d23709d269.json
deleted file mode 100644
index c1d18d78ddc56519e4ffc03f014534cc5ac151fc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-2ep/94b65c53-7e0c-4506-bd19-82d23709d269.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-2ep/1762652579.673873",
- "retrieved_timestamp": "1762652579.673873",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-2ep"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21747186354428472
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3179879277889672
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0377643504531722
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33679166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16273271276595744
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-3ep/1c779874-5568-462e-9e6e-0e3fd42d023e.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-3ep/1c779874-5568-462e-9e6e-0e3fd42d023e.json
deleted file mode 100644
index 473340d14764a3a3553e7900bfa5dcf9b2331058..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-3ep/1c779874-5568-462e-9e6e-0e3fd42d023e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-3ep/1762652579.674078",
- "retrieved_timestamp": "1762652579.674078",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-3ep"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2198699450790569
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32974820176156994
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.030211480362537766
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2533557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35933333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1651429521276596
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-5ep/f562a3e4-6afe-4c1d-a597-6265af34f925.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-5ep/f562a3e4-6afe-4c1d-a597-6265af34f925.json
deleted file mode 100644
index 80a496166e2b6a629e39e5dccba9e15d0cf080af..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-5ep/f562a3e4-6afe-4c1d-a597-6265af34f925.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-5e-5-5ep/1762652579.674291",
- "retrieved_timestamp": "1762652579.6742918",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5-5ep"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2077299343519639
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3275980298873716
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.027190332326283987
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3766354166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15866023936170212
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5/cdbbfad9-85e8-4c8b-b70c-708c08a62798.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5/cdbbfad9-85e8-4c8b-b70c-708c08a62798.json
deleted file mode 100644
index 7141974c3296b039f4fb045ba89d0abd258352be..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-5e-5/cdbbfad9-85e8-4c8b-b70c-708c08a62798.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-5e-5/1762652579.673672",
- "retrieved_timestamp": "1762652579.673672",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-5e-5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2009856070781083
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31093810553451656
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.033987915407854986
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33809375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16722074468085107
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-2ep/9cf15d33-3624-4161-bdad-069b09ab2290.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-2ep/9cf15d33-3624-4161-bdad-069b09ab2290.json
deleted file mode 100644
index ce79c904c10d82f55d54181eb70d9a365c4bef71..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-2ep/9cf15d33-3624-4161-bdad-069b09ab2290.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-2ep/1762652579.674706",
- "retrieved_timestamp": "1762652579.674707",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-2ep"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2156234347087949
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3100411318318588
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03927492447129909
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2424496644295302
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3367291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15674867021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-3ep/658df4b3-084f-479f-b507-3a4247683651.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-3ep/658df4b3-084f-479f-b507-3a4247683651.json
deleted file mode 100644
index ce4501728737b4137dcf22195bb991a7890b4f49..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-3ep/658df4b3-084f-479f-b507-3a4247683651.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-3ep/1762652579.674919",
- "retrieved_timestamp": "1762652579.674919",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-3ep"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23805502732749106
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3199313632207049
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03323262839879154
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23657718120805368
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3553645833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15217752659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-5ep/4e72cc33-538b-4fa7-8038-89794fed6511.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-5ep/4e72cc33-538b-4fa7-8038-89794fed6511.json
deleted file mode 100644
index 56616abcfd4276ceb93376243e7bd7258d369cd6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-5ep/4e72cc33-538b-4fa7-8038-89794fed6511.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-7e-5-5ep/1762652579.6751308",
- "retrieved_timestamp": "1762652579.6751318",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5-5ep"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21197644472222593
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32002953673668666
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02190332326283988
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24580536912751677
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37127083333333327
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1628158244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5/891bb442-c054-4941-9bd1-8352139f143e.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5/891bb442-c054-4941-9bd1-8352139f143e.json
deleted file mode 100644
index f9a64b5bbd934af8243e958536c9a9b4ea243200..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-7e-5/891bb442-c054-4941-9bd1-8352139f143e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-7e-5/1762652579.6744971",
- "retrieved_timestamp": "1762652579.674498",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-7e-5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20925366915340185
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3158179005969299
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.030211480362537766
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25671140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33669791666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1622340425531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-DPO-1epoch_v1/ac94a989-668a-49e6-9975-9169d7394574.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-DPO-1epoch_v1/ac94a989-668a-49e6-9975-9169d7394574.json
deleted file mode 100644
index 2766fefd6e4f73ed2c45dcd45024621eb6e202d7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-DPO-1epoch_v1/ac94a989-668a-49e6-9975-9169d7394574.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-DPO-1epoch_v1/1762652579.67534",
- "retrieved_timestamp": "1762652579.6753411",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-DPO-1epoch_v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20245947419513555
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.326814314271471
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03625377643504532
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3209166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13297872340425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-MDPO-1epoch_v1/6961b682-04e5-45af-bd2b-8ad6546503e7.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-MDPO-1epoch_v1/6961b682-04e5-45af-bd2b-8ad6546503e7.json
deleted file mode 100644
index 83916ef7b221ee639d2828076428d39668565bbe..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT-MDPO-1epoch_v1/6961b682-04e5-45af-bd2b-8ad6546503e7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT-MDPO-1epoch_v1/1762652579.675586",
- "retrieved_timestamp": "1762652579.6755872",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT-MDPO-1epoch_v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1964144026737944
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32925816453885065
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04682779456193353
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32615625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13372672872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT/eb0f4662-54f5-48ca-b871-726e34bbf540.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT/eb0f4662-54f5-48ca-b871-726e34bbf540.json
deleted file mode 100644
index 0045cf2a6ff456d823907cda7653dd7f6ad6ccf0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen2.5-0.5B-SFT/eb0f4662-54f5-48ca-b871-726e34bbf540.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen2.5-0.5B-SFT/1762652579.654298",
- "retrieved_timestamp": "1762652579.6542988",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen2.5-0.5B-SFT",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen2.5-0.5B-SFT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19636453498938372
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31207478976310743
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.027190332326283987
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2785234899328859
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3394270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16730385638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam/e4e00595-e1ed-42c9-a518-ff104253cad9.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam/e4e00595-e1ed-42c9-a518-ff104253cad9.json
deleted file mode 100644
index 5ee0d0204f54d3c25c26b7ac85d1ffafda57d049..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam/e4e00595-e1ed-42c9-a518-ff104253cad9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam/1762652579.675801",
- "retrieved_timestamp": "1762652579.675801",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-DPOP_1e-6-3ep_0alp_5lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25324250765746
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3140431891367934
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04909365558912387
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33145833333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15658244680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam/3a7a5a89-0ab8-47cd-95c6-14a6186e05b9.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam/3a7a5a89-0ab8-47cd-95c6-14a6186e05b9.json
deleted file mode 100644
index d31dd6c2a7ff251572d6bf96edd9571b3ef3b7dc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam/3a7a5a89-0ab8-47cd-95c6-14a6186e05b9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam/1762652579.676018",
- "retrieved_timestamp": "1762652579.676018",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-DPOP_1e-7-3ep_0alp_5lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26695612087040166
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3188575312560274
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04078549848942598
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26677852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32879166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15625
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam/f78ac837-d5f4-48f1-8a9e-1549b0020160.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam/f78ac837-d5f4-48f1-8a9e-1549b0020160.json
deleted file mode 100644
index 94ca4f4eb55036372870b7a5613f55fb60c0b3e0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam/f78ac837-d5f4-48f1-8a9e-1549b0020160.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam/1762652579.6762261",
- "retrieved_timestamp": "1762652579.6762261",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-DPOP_3e-6-1ep_0alp_5lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24807178286945303
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32608064671010917
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04380664652567976
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3368229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15649933510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam/2ae9cee5-8f3c-4303-802f-481a03edaf9f.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam/2ae9cee5-8f3c-4303-802f-481a03edaf9f.json
deleted file mode 100644
index 4a11f8df732c471249a36686ce03821ea507ded3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam/2ae9cee5-8f3c-4303-802f-481a03edaf9f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam/1762652579.67643",
- "retrieved_timestamp": "1762652579.6764312",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-DPOP_3e-6-2ep_0alp_5lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23832985367713222
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32184656431310543
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04305135951661632
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3341875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15034906914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam/654b55d0-940c-43bd-9478-0bd67bb7b0d8.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam/654b55d0-940c-43bd-9478-0bd67bb7b0d8.json
deleted file mode 100644
index bd0090610ddb10790dc7a9038cc380b77192160f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam/654b55d0-940c-43bd-9478-0bd67bb7b0d8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam/1762652579.676642",
- "retrieved_timestamp": "1762652579.6766431",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-DPOP_3e-6-3ep_0alp_5lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24714756845170813
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32244323308961736
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04003021148036254
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33276041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15334109042553193
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam/c23f1072-c7be-4eab-b866-16c6429071e4.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam/c23f1072-c7be-4eab-b866-16c6429071e4.json
deleted file mode 100644
index af75d0a66d134ef02f6eae2acaa0822b7a202af3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam/c23f1072-c7be-4eab-b866-16c6429071e4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam/1762652579.6768441",
- "retrieved_timestamp": "1762652579.676845",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-DPOP_3e-7-1ep_0alp_5lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24474948691693596
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3181429193838813
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04380664652567976
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.334125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15649933510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam/c02ad005-8e12-46d9-8bb3-090f62c6a946.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam/c02ad005-8e12-46d9-8bb3-090f62c6a946.json
deleted file mode 100644
index 927e5c50287f296a0f9bf2fd86734979841c6b3f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam/c02ad005-8e12-46d9-8bb3-090f62c6a946.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam/1762652579.677048",
- "retrieved_timestamp": "1762652579.6770492",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-DPOP_3e-7-2ep_0alp_5lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2551408041773605
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3194064593640778
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0445619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32615625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1566655585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam/e1d1dd0d-ef8e-44e1-aca1-f10c53f5aa84.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam/e1d1dd0d-ef8e-44e1-aca1-f10c53f5aa84.json
deleted file mode 100644
index cf74787a4fd3cbaf7c35bc4ec3dfa4f1ac200928..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam/e1d1dd0d-ef8e-44e1-aca1-f10c53f5aa84.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam/1762652579.677404",
- "retrieved_timestamp": "1762652579.677407",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-DPOP_3e-7-3ep_0alp_5lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25379216035674235
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31530652457997205
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04154078549848943
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.326125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1583277925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam/02c4e0de-4a4e-44b7-bc4c-44c92ade94ec.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam/02c4e0de-4a4e-44b7-bc4c-44c92ade94ec.json
deleted file mode 100644
index 87b63916c5d0ad7f145aa37164708dd044b8b06c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam/02c4e0de-4a4e-44b7-bc4c-44c92ade94ec.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam/1762652579.677789",
- "retrieved_timestamp": "1762652579.67779",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-DPOP_5e-7-1ep_0alp_5lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24022815019703275
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3168335157841944
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0377643504531722
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27181208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33279166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1568317819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam/4e38a2db-c67e-4f2a-84a0-f9afa7d32bd5.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam/4e38a2db-c67e-4f2a-84a0-f9afa7d32bd5.json
deleted file mode 100644
index 86e0177a854f9160a87316d52a74723f5382be7c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam/4e38a2db-c67e-4f2a-84a0-f9afa7d32bd5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam/1762652579.678058",
- "retrieved_timestamp": "1762652579.67806",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-DPOP_5e-7-2ep_0alp_5lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24839647690350491
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3210570160312575
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04380664652567976
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3288229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1573304521276596
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam/77255cfb-3e18-4a3b-98a8-b0072aacb669.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam/77255cfb-3e18-4a3b-98a8-b0072aacb669.json
deleted file mode 100644
index 61be6349d0c260cb05054017964c9651e3afef7c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam/77255cfb-3e18-4a3b-98a8-b0072aacb669.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam/1762652579.6783109",
- "retrieved_timestamp": "1762652579.6783118",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-DPOP_5e-7-3ep_0alp_5lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25781371206177384
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32030958605054793
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04229607250755287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32885416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1583277925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_1e-6-3ep_0alp_0lam/be9afede-e624-43e6-99dd-52e0d2b413ac.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_1e-6-3ep_0alp_0lam/be9afede-e624-43e6-99dd-52e0d2b413ac.json
deleted file mode 100644
index 14ecc89038b5adb271e6658adca78a9206af235d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_1e-6-3ep_0alp_0lam/be9afede-e624-43e6-99dd-52e0d2b413ac.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_1e-6-3ep_0alp_0lam/1762652579.678605",
- "retrieved_timestamp": "1762652579.678606",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-DPO_1e-6-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23163539408768735
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3258499805340021
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.052870090634441085
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26929530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.322125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15799534574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_1e-7-3ep_0alp_0lam/9632892a-a6b2-4f17-827e-bfef9a712985.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_1e-7-3ep_0alp_0lam/9632892a-a6b2-4f17-827e-bfef9a712985.json
deleted file mode 100644
index b47e93966299c78862c037e9819b7b14af9a418b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_1e-7-3ep_0alp_0lam/9632892a-a6b2-4f17-827e-bfef9a712985.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_1e-7-3ep_0alp_0lam/1762652579.678855",
- "retrieved_timestamp": "1762652579.678856",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-DPO_1e-7-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23598163982677073
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3225125170893353
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04380664652567976
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32221875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1595744680851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-6-1ep_0alp_0lam/a690910a-388f-4a51-98a2-fc1e1bb327e2.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-6-1ep_0alp_0lam/a690910a-388f-4a51-98a2-fc1e1bb327e2.json
deleted file mode 100644
index 19e07b6e25a6e17c4f0f746e85af46f58010b932..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-6-1ep_0alp_0lam/a690910a-388f-4a51-98a2-fc1e1bb327e2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-6-1ep_0alp_0lam/1762652579.679086",
- "retrieved_timestamp": "1762652579.679086",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-DPO_3e-6-1ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23370878158840763
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3132229900705577
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03474320241691843
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3235208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15325797872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-6-2ep_0alp_0lam/8c8eafcc-bb0f-4483-93ff-1379158a5d10.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-6-2ep_0alp_0lam/8c8eafcc-bb0f-4483-93ff-1379158a5d10.json
deleted file mode 100644
index 7704fcced05333ef0a0289247cad149e8465b9f9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-6-2ep_0alp_0lam/8c8eafcc-bb0f-4483-93ff-1379158a5d10.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-6-2ep_0alp_0lam/1762652579.6792939",
- "retrieved_timestamp": "1762652579.679295",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-DPO_3e-6-2ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25693936532843964
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32760017293049276
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.054380664652567974
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27181208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3155833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15649933510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-6-3ep_0alp_0lam/6c009b93-145d-4630-bda1-fb24bf764e7a.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-6-3ep_0alp_0lam/6c009b93-145d-4630-bda1-fb24bf764e7a.json
deleted file mode 100644
index 74fedd3838547e8515408d6a61b70a1e825e41e7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-6-3ep_0alp_0lam/6c009b93-145d-4630-bda1-fb24bf764e7a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-6-3ep_0alp_0lam/1762652579.679507",
- "retrieved_timestamp": "1762652579.679507",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-DPO_3e-6-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24599839536873275
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32674094707635526
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04305135951661632
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3209166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15433843085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-7-1ep_0alp_0lam/1b4ccc58-920c-4089-b8ca-af3c71c5c3be.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-7-1ep_0alp_0lam/1b4ccc58-920c-4089-b8ca-af3c71c5c3be.json
deleted file mode 100644
index 7f7e027ac7e5f54e937fb28a30e57c1c2a172ecc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-7-1ep_0alp_0lam/1b4ccc58-920c-4089-b8ca-af3c71c5c3be.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-7-1ep_0alp_0lam/1762652579.679712",
- "retrieved_timestamp": "1762652579.679712",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-DPO_3e-7-1ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2529178136234081
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32292563083414066
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05513595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3195208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15965757978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-7-2ep_0alp_0lam/4d278257-d64b-4da7-bcd6-0d3fbee80dd8.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-7-2ep_0alp_0lam/4d278257-d64b-4da7-bcd6-0d3fbee80dd8.json
deleted file mode 100644
index 3effa9e78cfbe4d85b8287b4d7ccfaf5b4071352..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-7-2ep_0alp_0lam/4d278257-d64b-4da7-bcd6-0d3fbee80dd8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-7-2ep_0alp_0lam/1762652579.6799219",
- "retrieved_timestamp": "1762652579.679923",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-DPO_3e-7-2ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25046986440422525
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3255735108237258
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04758308157099698
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27181208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3194895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15990691489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-7-3ep_0alp_0lam/3650d718-e20a-4310-a248-3897f7713e93.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-7-3ep_0alp_0lam/3650d718-e20a-4310-a248-3897f7713e93.json
deleted file mode 100644
index a0f5909829a0024d0b8e03224767050595163086..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_3e-7-3ep_0alp_0lam/3650d718-e20a-4310-a248-3897f7713e93.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_3e-7-3ep_0alp_0lam/1762652579.680135",
- "retrieved_timestamp": "1762652579.680136",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-DPO_3e-7-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2387044153955948
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3258394284267221
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0445619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31685416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1589095744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_5e-7-1ep_0alp_0lam/6e224cd8-7f12-42a0-968e-311450d24e58.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_5e-7-1ep_0alp_0lam/6e224cd8-7f12-42a0-968e-311450d24e58.json
deleted file mode 100644
index 0473c073418dbb695cc4d3447b17c1be837abe83..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_5e-7-1ep_0alp_0lam/6e224cd8-7f12-42a0-968e-311450d24e58.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_5e-7-1ep_0alp_0lam/1762652579.6803432",
- "retrieved_timestamp": "1762652579.6803432",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-DPO_5e-7-1ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25324250765746
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32182747858122923
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0634441087613293
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32085416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15932513297872342
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_5e-7-2ep_0alp_0lam/1f17dbf3-f498-41cb-8ec0-5dabb2d9655e.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_5e-7-2ep_0alp_0lam/1f17dbf3-f498-41cb-8ec0-5dabb2d9655e.json
deleted file mode 100644
index b0aecbaa318d94cbadb926fff945b61334bfd169..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_5e-7-2ep_0alp_0lam/1f17dbf3-f498-41cb-8ec0-5dabb2d9655e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_5e-7-2ep_0alp_0lam/1762652579.680558",
- "retrieved_timestamp": "1762652579.6805592",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-DPO_5e-7-2ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24562383365027018
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3299192088381941
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05362537764350453
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.318125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16015625
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_5e-7-3ep_0alp_0lam/c5829ba8-e45c-4242-b308-9455f832cb58.json b/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_5e-7-3ep_0alp_0lam/c5829ba8-e45c-4242-b308-9455f832cb58.json
deleted file mode 100644
index 4e154fe941783755f08853b0f07624f36c282ea2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JayHyeon/JayHyeon_Qwen_0.5-DPO_5e-7-3ep_0alp_0lam/c5829ba8-e45c-4242-b308-9455f832cb58.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-DPO_5e-7-3ep_0alp_0lam/1762652579.680775",
- "retrieved_timestamp": "1762652579.680775",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam",
- "developer": "JayHyeon",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-DPO_5e-7-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24225167001334236
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32712145602920534
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0513595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.318125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15949135638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_Llama-3-Instruct-8B-SimPO-v0.2/4d7428e8-41a2-4834-900e-e43b05f4d131.json b/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_Llama-3-Instruct-8B-SimPO-v0.2/4d7428e8-41a2-4834-900e-e43b05f4d131.json
deleted file mode 100644
index 4bca5680cb4658a1be2c9e84839b2d885b66cd6f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_Llama-3-Instruct-8B-SimPO-v0.2/4d7428e8-41a2-4834-900e-e43b05f4d131.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Jimmy19991222_Llama-3-Instruct-8B-SimPO-v0.2/1762652579.692669",
- "retrieved_timestamp": "1762652579.692669",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2",
- "developer": "Jimmy19991222",
- "inference_platform": "unknown",
- "id": "Jimmy19991222/Llama-3-Instruct-8B-SimPO-v0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6540368444615842
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.498371102582105
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.061933534743202415
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3145973154362416
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40125000000000005
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3686003989361702
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun/9e8f395c-f481-4a64-86ee-053961b17c42.json b/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun/9e8f395c-f481-4a64-86ee-053961b17c42.json
deleted file mode 100644
index 23519410c8201d2a8fdd2e0dba8bf702b41a52e5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun/9e8f395c-f481-4a64-86ee-053961b17c42.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun/1762652579.6929338",
- "retrieved_timestamp": "1762652579.692935",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun",
- "developer": "Jimmy19991222",
- "inference_platform": "unknown",
- "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert-f1-beta10-gamma0.3-lr1.0e-6-1minus-rerun"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6717221416951467
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48797965672899357
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4040729166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36336436170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log/913d1072-8ea3-4e0d-9d72-d30ae186dc7d.json b/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log/913d1072-8ea3-4e0d-9d72-d30ae186dc7d.json
deleted file mode 100644
index dcdbbe169b50f91f27f6d5089b004d904e6a040b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log/913d1072-8ea3-4e0d-9d72-d30ae186dc7d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log/1762652579.6931531",
- "retrieved_timestamp": "1762652579.693154",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log",
- "developer": "Jimmy19991222",
- "inference_platform": "unknown",
- "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_f1-beta10-gamma0.3-lr1.0e-6-scale-log"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6555605792630221
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49345840367294164
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.054380664652567974
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4000104166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3657746010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log/55baee54-fb05-49a1-962d-145a93de91a8.json b/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log/55baee54-fb05-49a1-962d-145a93de91a8.json
deleted file mode 100644
index d080d381e70dd3ad5e78f05b9ec1910de6e282af..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log/55baee54-fb05-49a1-962d-145a93de91a8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log/1762652579.693368",
- "retrieved_timestamp": "1762652579.6933692",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log",
- "developer": "Jimmy19991222",
- "inference_platform": "unknown",
- "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bert_p-beta10-gamma0.3-lr1.0e-6-scale-log"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6315055164740666
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4916414793938901
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0649546827794562
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2860738255033557
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3935
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3611203457446808
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4/601e250a-5c2f-4947-9ea3-0f903b2823ec.json b/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4/601e250a-5c2f-4947-9ea3-0f903b2823ec.json
deleted file mode 100644
index 8544a40d8edd859e4740904fd232a9fc2cda4a9f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4/601e250a-5c2f-4947-9ea3-0f903b2823ec.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4/1762652579.69359",
- "retrieved_timestamp": "1762652579.693591",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4",
- "developer": "Jimmy19991222",
- "inference_platform": "unknown",
- "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-bleu-beta0.1-no-length-scale-gamma0.4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6284580468711907
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4986088445592742
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0513595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29278523489932884
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40137500000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3544714095744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun/8ab1619c-6edf-457e-9834-0e9dc127d6a4.json b/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun/8ab1619c-6edf-457e-9834-0e9dc127d6a4.json
deleted file mode 100644
index c35539c8d11db255003ea785b6483f6be438fae1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun/8ab1619c-6edf-457e-9834-0e9dc127d6a4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun/1762652579.69381",
- "retrieved_timestamp": "1762652579.693811",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun",
- "developer": "Jimmy19991222",
- "inference_platform": "unknown",
- "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-1minus-gamma0.3-rerun"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6677504576745258
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4940463886115545
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06117824773413897
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3987083333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3657746010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log/5f6d2c1e-1c66-4b1c-beed-a730d93d997f.json b/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log/5f6d2c1e-1c66-4b1c-beed-a730d93d997f.json
deleted file mode 100644
index 3d34b669b48b24808c9806e505be49366ee7e29f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log/5f6d2c1e-1c66-4b1c-beed-a730d93d997f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log/1762652579.69404",
- "retrieved_timestamp": "1762652579.694041",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log",
- "developer": "Jimmy19991222",
- "inference_platform": "unknown",
- "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rouge2-beta10-gamma0.3-lr1.0e-6-scale-log"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6605063453857986
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49160075581298046
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06570996978851963
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4000416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3664394946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log/6621f47a-13c7-421c-b054-cc9116a04e4e.json b/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log/6621f47a-13c7-421c-b054-cc9116a04e4e.json
deleted file mode 100644
index 746499d28b799cd5a430c95728ab173c4f00ca05..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Jimmy19991222/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log/6621f47a-13c7-421c-b054-cc9116a04e4e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Jimmy19991222_llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log/1762652579.694266",
- "retrieved_timestamp": "1762652579.6942668",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log",
- "developer": "Jimmy19991222",
- "inference_platform": "unknown",
- "id": "Jimmy19991222/llama-3-8b-instruct-gapo-v2-rougeL-beta10-gamma0.3-lr1.0e-6-scale-log"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.649190813707629
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4952489348573605
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06419939577039276
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3961354166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37109375
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/JungZoona/JungZoona_T3Q-Qwen2.5-14B-Instruct-1M-e3/464673ee-0238-40b4-9c15-1a1551b9f65c.json b/leaderboard_data/HFOpenLLMv2/JungZoona/JungZoona_T3Q-Qwen2.5-14B-Instruct-1M-e3/464673ee-0238-40b4-9c15-1a1551b9f65c.json
deleted file mode 100644
index c9d621281dae5e8cb071a2bc24904aa7b05fb477..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/JungZoona/JungZoona_T3Q-Qwen2.5-14B-Instruct-1M-e3/464673ee-0238-40b4-9c15-1a1551b9f65c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JungZoona_T3Q-Qwen2.5-14B-Instruct-1M-e3/1762652579.696794",
- "retrieved_timestamp": "1762652579.696794",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3",
- "developer": "JungZoona",
- "inference_platform": "unknown",
- "id": "JungZoona/T3Q-Qwen2.5-14B-Instruct-1M-e3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.732396707403024
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7585971930826706
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2862537764350453
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41694630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5911041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5884308510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Unknown",
- "params_billions": 0.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3-70b-SVA-FT-1415/08fcda98-72e9-4338-b2a2-6db924a47288.json b/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3-70b-SVA-FT-1415/08fcda98-72e9-4338-b2a2-6db924a47288.json
deleted file mode 100644
index 933b956d883344b68b544c47850b39f5278c740c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3-70b-SVA-FT-1415/08fcda98-72e9-4338-b2a2-6db924a47288.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/KSU-HW-SEC_Llama3-70b-SVA-FT-1415/1762652579.6977122",
- "retrieved_timestamp": "1762652579.697713",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "KSU-HW-SEC/Llama3-70b-SVA-FT-1415",
- "developer": "KSU-HW-SEC",
- "inference_platform": "unknown",
- "id": "KSU-HW-SEC/Llama3-70b-SVA-FT-1415"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6179913739987677
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6650146340680478
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21978851963746224
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.375
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4565416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5242686170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3-70b-SVA-FT-500/4282c191-344e-4326-a80e-49b712687e7c.json b/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3-70b-SVA-FT-500/4282c191-344e-4326-a80e-49b712687e7c.json
deleted file mode 100644
index dffec2dacba3e0e30ab7abcb1c0a968e2eaf5aab..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3-70b-SVA-FT-500/4282c191-344e-4326-a80e-49b712687e7c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/KSU-HW-SEC_Llama3-70b-SVA-FT-500/1762652579.6980212",
- "retrieved_timestamp": "1762652579.698022",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "KSU-HW-SEC/Llama3-70b-SVA-FT-500",
- "developer": "KSU-HW-SEC",
- "inference_platform": "unknown",
- "id": "KSU-HW-SEC/Llama3-70b-SVA-FT-500"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6105223030448099
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6692236023098005
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21374622356495468
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45114583333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.522689494680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3-70b-SVA-FT-final/58fe6545-2f0c-44de-a29b-2da839b141a4.json b/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3-70b-SVA-FT-final/58fe6545-2f0c-44de-a29b-2da839b141a4.json
deleted file mode 100644
index 10279916785def01a81f680b5bb1d62de56fdec0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3-70b-SVA-FT-final/58fe6545-2f0c-44de-a29b-2da839b141a4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/KSU-HW-SEC_Llama3-70b-SVA-FT-final/1762652579.698244",
- "retrieved_timestamp": "1762652579.698245",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "KSU-HW-SEC/Llama3-70b-SVA-FT-final",
- "developer": "KSU-HW-SEC",
- "inference_platform": "unknown",
- "id": "KSU-HW-SEC/Llama3-70b-SVA-FT-final"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6164676391973297
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6650146340680478
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21978851963746224
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.375
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4565416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5242686170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3.1-70b-SVA-FT-1000step/fe896cef-7667-482d-b7f1-5361fc66ccce.json b/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3.1-70b-SVA-FT-1000step/fe896cef-7667-482d-b7f1-5361fc66ccce.json
deleted file mode 100644
index 5d0fd4ebf2f000b88cf7a863ed9ccb5b7326b116..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/KSU-HW-SEC/KSU-HW-SEC_Llama3.1-70b-SVA-FT-1000step/fe896cef-7667-482d-b7f1-5361fc66ccce.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/KSU-HW-SEC_Llama3.1-70b-SVA-FT-1000step/1762652579.698519",
- "retrieved_timestamp": "1762652579.69852",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step",
- "developer": "KSU-HW-SEC",
- "inference_platform": "unknown",
- "id": "KSU-HW-SEC/Llama3.1-70b-SVA-FT-1000step"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7238039512936785
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6903120365165111
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32099697885196377
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3959731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45917708333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5251828457446809
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Khetterman/Khetterman_DarkAtom-12B-v3/64802b86-879e-4072-b5ad-aab17d7251f0.json b/leaderboard_data/HFOpenLLMv2/Khetterman/Khetterman_DarkAtom-12B-v3/64802b86-879e-4072-b5ad-aab17d7251f0.json
deleted file mode 100644
index c8f733aa01e6b17f6b3544bcdc5841cf75c1983d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Khetterman/Khetterman_DarkAtom-12B-v3/64802b86-879e-4072-b5ad-aab17d7251f0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Khetterman_DarkAtom-12B-v3/1762652579.6987362",
- "retrieved_timestamp": "1762652579.698737",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Khetterman/DarkAtom-12B-v3",
- "developer": "Khetterman",
- "inference_platform": "unknown",
- "id": "Khetterman/DarkAtom-12B-v3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6173419859306639
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5153709655381875
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11102719033232629
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4468020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3546376329787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Khetterman/Khetterman_Kosmos-8B-v1/936cbaa1-e55b-46b8-9610-a5a8faaf4434.json b/leaderboard_data/HFOpenLLMv2/Khetterman/Khetterman_Kosmos-8B-v1/936cbaa1-e55b-46b8-9610-a5a8faaf4434.json
deleted file mode 100644
index dd0ae491329289d3f45250b9bc1b8634971d14db..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Khetterman/Khetterman_Kosmos-8B-v1/936cbaa1-e55b-46b8-9610-a5a8faaf4434.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Khetterman_Kosmos-8B-v1/1762652579.6990001",
- "retrieved_timestamp": "1762652579.699001",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Khetterman/Kosmos-8B-v1",
- "developer": "Khetterman",
- "inference_platform": "unknown",
- "id": "Khetterman/Kosmos-8B-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41291107594515886
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5233522858623628
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09894259818731117
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3918854166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.366938164893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/KingNish/KingNish_Reasoning-0.5b/98f5e59e-0bdb-405b-a18e-3addd8920951.json b/leaderboard_data/HFOpenLLMv2/KingNish/KingNish_Reasoning-0.5b/98f5e59e-0bdb-405b-a18e-3addd8920951.json
deleted file mode 100644
index 42d569370844f0cff7e8771ceacd08fc132ea334..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/KingNish/KingNish_Reasoning-0.5b/98f5e59e-0bdb-405b-a18e-3addd8920951.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/KingNish_Reasoning-0.5b/1762652579.6997252",
- "retrieved_timestamp": "1762652579.699726",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "KingNish/Reasoning-0.5b",
- "developer": "KingNish",
- "inference_platform": "unknown",
- "id": "KingNish/Reasoning-0.5b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.217421995859874
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33536255853174524
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02190332326283988
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35133333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16414561170212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Kquant03/Kquant03_CognitiveFusion2-4x7B-BF16/66f84aee-5d79-4fec-9fff-799ac874d165.json b/leaderboard_data/HFOpenLLMv2/Kquant03/Kquant03_CognitiveFusion2-4x7B-BF16/66f84aee-5d79-4fec-9fff-799ac874d165.json
deleted file mode 100644
index 7f3bee28111ee33ba19e82ddb9642ff04367adc1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Kquant03/Kquant03_CognitiveFusion2-4x7B-BF16/66f84aee-5d79-4fec-9fff-799ac874d165.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Kquant03_CognitiveFusion2-4x7B-BF16/1762652579.701032",
- "retrieved_timestamp": "1762652579.7010329",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Kquant03/CognitiveFusion2-4x7B-BF16",
- "developer": "Kquant03",
- "inference_platform": "unknown",
- "id": "Kquant03/CognitiveFusion2-4x7B-BF16"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35665700341759865
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41078286111483786
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05740181268882175
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2860738255033557
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4145520833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27925531914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 24.154
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Kquant03/Kquant03_L3-Pneuma-8B/5420d88b-bc26-4d04-9812-ffce8a3564e6.json b/leaderboard_data/HFOpenLLMv2/Kquant03/Kquant03_L3-Pneuma-8B/5420d88b-bc26-4d04-9812-ffce8a3564e6.json
deleted file mode 100644
index 3fbd6031725f1b315619ab6a102a807b87f976ee..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Kquant03/Kquant03_L3-Pneuma-8B/5420d88b-bc26-4d04-9812-ffce8a3564e6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Kquant03_L3-Pneuma-8B/1762652579.701272",
- "retrieved_timestamp": "1762652579.7012732",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Kquant03/L3-Pneuma-8B",
- "developer": "Kquant03",
- "inference_platform": "unknown",
- "id": "Kquant03/L3-Pneuma-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2374056392593873
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49550433176754827
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05060422960725076
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41715624999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31840093085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Krystalan/Krystalan_DRT-o1-14B/dbd87f5e-e5ba-447b-8416-b6413c3dab09.json b/leaderboard_data/HFOpenLLMv2/Krystalan/Krystalan_DRT-o1-14B/dbd87f5e-e5ba-447b-8416-b6413c3dab09.json
deleted file mode 100644
index 8523783ec285c203cf7789fe276dccefae7a5ef6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Krystalan/Krystalan_DRT-o1-14B/dbd87f5e-e5ba-447b-8416-b6413c3dab09.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Krystalan_DRT-o1-14B/1762652579.70148",
- "retrieved_timestamp": "1762652579.7014809",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Krystalan/DRT-o1-14B",
- "developer": "Krystalan",
- "inference_platform": "unknown",
- "id": "Krystalan/DRT-o1-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4067662690549963
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.637927537514229
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4826283987915408
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3523489932885906
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47951041666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5178690159574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Krystalan/Krystalan_DRT-o1-7B/acb8e4cc-41b2-47ef-b819-d480189c618c.json b/leaderboard_data/HFOpenLLMv2/Krystalan/Krystalan_DRT-o1-7B/acb8e4cc-41b2-47ef-b819-d480189c618c.json
deleted file mode 100644
index 2ed883920ebacda7cb2243ce983357e887dab62e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Krystalan/Krystalan_DRT-o1-7B/acb8e4cc-41b2-47ef-b819-d480189c618c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Krystalan_DRT-o1-7B/1762652579.701715",
- "retrieved_timestamp": "1762652579.701716",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Krystalan/DRT-o1-7B",
- "developer": "Krystalan",
- "inference_platform": "unknown",
- "id": "Krystalan/DRT-o1-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3928276971768242
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5467693339610741
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4478851963746224
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3213087248322148
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.50865625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41514295212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralExperiment-7b-MagicCoder-v7.5/4775e169-e3a7-41b6-bf1e-a7e8e0edb4fc.json b/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralExperiment-7b-MagicCoder-v7.5/4775e169-e3a7-41b6-bf1e-a7e8e0edb4fc.json
deleted file mode 100644
index f27f4601c7d69bf09c2be849baa0f51d17565955..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralExperiment-7b-MagicCoder-v7.5/4775e169-e3a7-41b6-bf1e-a7e8e0edb4fc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralExperiment-7b-MagicCoder-v7.5/1762652579.701928",
- "retrieved_timestamp": "1762652579.7019289",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5",
- "developer": "Kukedlc",
- "inference_platform": "unknown",
- "id": "Kukedlc/NeuralExperiment-7b-MagicCoder-v7.5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4552509563513699
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3988446544778517
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06646525679758308
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4281979166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2824135638297872
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralSynthesis-7B-v0.1/3d2603e3-d556-48e8-ba94-555faf9f1807.json b/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralSynthesis-7B-v0.1/3d2603e3-d556-48e8-ba94-555faf9f1807.json
deleted file mode 100644
index eec2d7492eb0ef8d6a1a5f20424056a824a029e8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralSynthesis-7B-v0.1/3d2603e3-d556-48e8-ba94-555faf9f1807.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralSynthesis-7B-v0.1/1762652579.7026482",
- "retrieved_timestamp": "1762652579.702649",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Kukedlc/NeuralSynthesis-7B-v0.1",
- "developer": "Kukedlc",
- "inference_platform": "unknown",
- "id": "Kukedlc/NeuralSynthesis-7B-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4184563624516283
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5144745481048844
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0634441087613293
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43328125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.304936835106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralSynthesis-7B-v0.3/b3412f38-d0bc-47c9-a750-14bdbf4e65d8.json b/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralSynthesis-7B-v0.3/b3412f38-d0bc-47c9-a750-14bdbf4e65d8.json
deleted file mode 100644
index 8ee26453d0f778fd7f6622ba81a502295a0582a6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralSynthesis-7B-v0.3/b3412f38-d0bc-47c9-a750-14bdbf4e65d8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralSynthesis-7B-v0.3/1762652579.702864",
- "retrieved_timestamp": "1762652579.702865",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Kukedlc/NeuralSynthesis-7B-v0.3",
- "developer": "Kukedlc",
- "inference_platform": "unknown",
- "id": "Kukedlc/NeuralSynthesis-7B-v0.3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4078400865259733
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5138078814382175
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07779456193353475
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4345833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30501994680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralSynthesis-7b-v0.4-slerp/4e30bf00-f6b7-4c28-8cf8-dc64427fb958.json b/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralSynthesis-7b-v0.4-slerp/4e30bf00-f6b7-4c28-8cf8-dc64427fb958.json
deleted file mode 100644
index 2a9b9b75891d865dcf69740116e383637f29dfe6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Kukedlc/Kukedlc_NeuralSynthesis-7b-v0.4-slerp/4e30bf00-f6b7-4c28-8cf8-dc64427fb958.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Kukedlc_NeuralSynthesis-7b-v0.4-slerp/1762652579.7030761",
- "retrieved_timestamp": "1762652579.703077",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Kukedlc/NeuralSynthesis-7b-v0.4-slerp",
- "developer": "Kukedlc",
- "inference_platform": "unknown",
- "id": "Kukedlc/NeuralSynthesis-7b-v0.4-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3947259936967247
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5142932549151301
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06268882175226587
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43324999999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3042719414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Kumar955/Kumar955_Hemanth-llm/0787e240-a1f4-444a-b3dd-7ef1a1d394b4.json b/leaderboard_data/HFOpenLLMv2/Kumar955/Kumar955_Hemanth-llm/0787e240-a1f4-444a-b3dd-7ef1a1d394b4.json
deleted file mode 100644
index 91295717789cb7038d35483a92b5865ea806a59a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Kumar955/Kumar955_Hemanth-llm/0787e240-a1f4-444a-b3dd-7ef1a1d394b4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Kumar955_Hemanth-llm/1762652579.703545",
- "retrieved_timestamp": "1762652579.703546",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Kumar955/Hemanth-llm",
- "developer": "Kumar955",
- "inference_platform": "unknown",
- "id": "Kumar955/Hemanth-llm"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5045102550122564
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.522494907014536
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0702416918429003
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4485625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3112533244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/L-RAGE/L-RAGE_3_PRYMMAL-ECE-7B-SLERP-V1/02fee4d1-8899-4a93-b6f1-a1a8d251cedd.json b/leaderboard_data/HFOpenLLMv2/L-RAGE/L-RAGE_3_PRYMMAL-ECE-7B-SLERP-V1/02fee4d1-8899-4a93-b6f1-a1a8d251cedd.json
deleted file mode 100644
index cbc899f7e5100cb9090e4cf371cc4ea068bb012c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/L-RAGE/L-RAGE_3_PRYMMAL-ECE-7B-SLERP-V1/02fee4d1-8899-4a93-b6f1-a1a8d251cedd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/L-RAGE_3_PRYMMAL-ECE-7B-SLERP-V1/1762652579.703805",
- "retrieved_timestamp": "1762652579.703806",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1",
- "developer": "L-RAGE",
- "inference_platform": "unknown",
- "id": "L-RAGE/3_PRYMMAL-ECE-7B-SLERP-V1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27422572108671656
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.422793974567173
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10800604229607251
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28187919463087246
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3841354166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29247007978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.0-7.8B-Instruct/97f7c73d-6d69-4c04-9cff-4914253003b0.json b/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.0-7.8B-Instruct/97f7c73d-6d69-4c04-9cff-4914253003b0.json
deleted file mode 100644
index 257ba699656c8cc7e1e17a3857928de51c6ab7d4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.0-7.8B-Instruct/97f7c73d-6d69-4c04-9cff-4914253003b0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LGAI-EXAONE_EXAONE-3.0-7.8B-Instruct/1762652579.705025",
- "retrieved_timestamp": "1762652579.705025",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct",
- "developer": "LGAI-EXAONE",
- "inference_platform": "unknown",
- "id": "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7192826145737754
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4174432647784512
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30438066465256797
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.366125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35771276595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "ExaoneForCausalLM",
- "params_billions": 7.8
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.5-2.4B-Instruct/e2a2d764-ba6b-450d-8f94-abf2af95e793.json b/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.5-2.4B-Instruct/e2a2d764-ba6b-450d-8f94-abf2af95e793.json
deleted file mode 100644
index 261b260a3da4f800e7f2047e4efb1c494153e82e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.5-2.4B-Instruct/e2a2d764-ba6b-450d-8f94-abf2af95e793.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LGAI-EXAONE_EXAONE-3.5-2.4B-Instruct/1762652579.705282",
- "retrieved_timestamp": "1762652579.7052832",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct",
- "developer": "LGAI-EXAONE",
- "inference_platform": "unknown",
- "id": "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7950449252428002
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4092347113723405
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3678247734138973
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.366125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32804188829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "ExaoneForCausalLM",
- "params_billions": 2.405
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.5-32B-Instruct/a172b1d1-6d6e-4cd9-9a85-78cb4f71661e.json b/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.5-32B-Instruct/a172b1d1-6d6e-4cd9-9a85-78cb4f71661e.json
deleted file mode 100644
index d41a58c59979a6f8ad8aa0235aee05d628e9edb3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.5-32B-Instruct/a172b1d1-6d6e-4cd9-9a85-78cb4f71661e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LGAI-EXAONE_EXAONE-3.5-32B-Instruct/1762652579.705488",
- "retrieved_timestamp": "1762652579.705489",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LGAI-EXAONE/EXAONE-3.5-32B-Instruct",
- "developer": "LGAI-EXAONE",
- "inference_platform": "unknown",
- "id": "LGAI-EXAONE/EXAONE-3.5-32B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8391833668000904
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5760913742720142
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5128398791540786
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.287751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38066666666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4636801861702128
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "ExaoneForCausalLM",
- "params_billions": 32.003
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.5-7.8B-Instruct/7fa474fb-4aa1-4855-9759-a28056c7a5e7.json b/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.5-7.8B-Instruct/7fa474fb-4aa1-4855-9759-a28056c7a5e7.json
deleted file mode 100644
index 18ba8467c0d2462d7fdfe598f484a36916f1ddbf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LGAI-EXAONE/LGAI-EXAONE_EXAONE-3.5-7.8B-Instruct/7fa474fb-4aa1-4855-9759-a28056c7a5e7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LGAI-EXAONE_EXAONE-3.5-7.8B-Instruct/1762652579.705873",
- "retrieved_timestamp": "1762652579.705875",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct",
- "developer": "LGAI-EXAONE",
- "inference_platform": "unknown",
- "id": "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8136045692096969
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4727592304359862
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47507552870090636
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3779375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4133144946808511
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "ExaoneForCausalLM",
- "params_billions": 7.818
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LLM360/LLM360_K2-Chat/f7e7c296-74f4-49fa-946d-142341749355.json b/leaderboard_data/HFOpenLLMv2/LLM360/LLM360_K2-Chat/f7e7c296-74f4-49fa-946d-142341749355.json
deleted file mode 100644
index 4f02e98bd16340be2e5831a8f959723c4b5daf7e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LLM360/LLM360_K2-Chat/f7e7c296-74f4-49fa-946d-142341749355.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LLM360_K2-Chat/1762652579.706591",
- "retrieved_timestamp": "1762652579.706592",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LLM360/K2-Chat",
- "developer": "LLM360",
- "inference_platform": "unknown",
- "id": "LLM360/K2-Chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5151763986223221
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5358099630242067
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10347432024169184
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.457
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3371010638297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 65.286
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LLM360/LLM360_K2/4b1e267f-90c4-403a-a7cd-5c006153408b.json b/leaderboard_data/HFOpenLLMv2/LLM360/LLM360_K2/4b1e267f-90c4-403a-a7cd-5c006153408b.json
deleted file mode 100644
index ab179cbfc24273bb707c09772436eeb975f5fd8f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LLM360/LLM360_K2/4b1e267f-90c4-403a-a7cd-5c006153408b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LLM360_K2/1762652579.706215",
- "retrieved_timestamp": "1762652579.7062159",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LLM360/K2",
- "developer": "LLM360",
- "inference_platform": "unknown",
- "id": "LLM360/K2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2252157608478836
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4971835676523677
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.027190332326283987
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27684563758389263
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39799999999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30044880319148937
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 65.286
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LLM4Binary/LLM4Binary_llm4decompile-1.3b-v2/86f0a81b-69da-4f36-a6b0-8a36f79d5c1c.json b/leaderboard_data/HFOpenLLMv2/LLM4Binary/LLM4Binary_llm4decompile-1.3b-v2/86f0a81b-69da-4f36-a6b0-8a36f79d5c1c.json
deleted file mode 100644
index a95191eddc57bbbf7702b21ceed1f5db1b31bc66..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LLM4Binary/LLM4Binary_llm4decompile-1.3b-v2/86f0a81b-69da-4f36-a6b0-8a36f79d5c1c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LLM4Binary_llm4decompile-1.3b-v2/1762652579.7068748",
- "retrieved_timestamp": "1762652579.706877",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LLM4Binary/llm4decompile-1.3b-v2",
- "developer": "LLM4Binary",
- "inference_platform": "unknown",
- "id": "LLM4Binary/llm4decompile-1.3b-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22678936333373229
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3271808417267589
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01283987915407855
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23573825503355705
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4071770833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12092752659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.346
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Langboat/Langboat_Mengzi3-8B-Chat/13e12b5c-d3bb-4634-967d-e5741e623be1.json b/leaderboard_data/HFOpenLLMv2/Langboat/Langboat_Mengzi3-8B-Chat/13e12b5c-d3bb-4634-967d-e5741e623be1.json
deleted file mode 100644
index 21cc4135ea51d2e89dfe5ecfd7d08d37c261a2fa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Langboat/Langboat_Mengzi3-8B-Chat/13e12b5c-d3bb-4634-967d-e5741e623be1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Langboat_Mengzi3-8B-Chat/1762652579.707526",
- "retrieved_timestamp": "1762652579.707527",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Langboat/Mengzi3-8B-Chat",
- "developer": "Langboat",
- "inference_platform": "unknown",
- "id": "Langboat/Mengzi3-8B-Chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.513977357854936
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4683725003203179
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09063444108761329
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4077916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31416223404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBA100/745591e3-3c6a-473a-9e51-4bffe1c86fa7.json b/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBA100/745591e3-3c6a-473a-9e51-4bffe1c86fa7.json
deleted file mode 100644
index 8146728c649ba1be9f789b4c59fbe370227b696c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBA100/745591e3-3c6a-473a-9e51-4bffe1c86fa7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lawnakk_BBA100/1762652579.707814",
- "retrieved_timestamp": "1762652579.707815",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lawnakk/BBA100",
- "developer": "Lawnakk",
- "inference_platform": "unknown",
- "id": "Lawnakk/BBA100"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2075803312987318
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2825701502983552
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.009818731117824773
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24412751677852348
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40196875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11220079787234043
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.0/61739e6e-92b0-4577-acd2-8c58ffc612a4.json b/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.0/61739e6e-92b0-4577-acd2-8c58ffc612a4.json
deleted file mode 100644
index 00cefef81d6d08744f0e5a8490b4909b6e67d2fd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.0/61739e6e-92b0-4577-acd2-8c58ffc612a4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.0/1762652579.708328",
- "retrieved_timestamp": "1762652579.708329",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lawnakk/BBALAW1.0",
- "developer": "Lawnakk",
- "inference_platform": "unknown",
- "id": "Lawnakk/BBALAW1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13511482865463637
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28276697965906106
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2558724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3525729166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11278257978723404
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 4.353
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.2/917081cc-ee33-4c1f-85b0-9256ef57f6b3.json b/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.2/917081cc-ee33-4c1f-85b0-9256ef57f6b3.json
deleted file mode 100644
index 90250844c981d51342e5e1f2b7495de9c46db04f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.2/917081cc-ee33-4c1f-85b0-9256ef57f6b3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.2/1762652579.708597",
- "retrieved_timestamp": "1762652579.708598",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lawnakk/BBALAW1.2",
- "developer": "Lawnakk",
- "inference_platform": "unknown",
- "id": "Lawnakk/BBALAW1.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13543952268868825
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28112730419661675
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35790625000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11053856382978723
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 4.353
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.3/60fa19b9-bf1d-4f39-b421-cb59379f5206.json b/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.3/60fa19b9-bf1d-4f39-b421-cb59379f5206.json
deleted file mode 100644
index 4dc5a8c3a44fcce3025d96652c2145c6c4efca6d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.3/60fa19b9-bf1d-4f39-b421-cb59379f5206.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.3/1762652579.70884",
- "retrieved_timestamp": "1762652579.7088408",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lawnakk/BBALAW1.3",
- "developer": "Lawnakk",
- "inference_platform": "unknown",
- "id": "Lawnakk/BBALAW1.3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13543952268868825
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28269808045232453
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36190625000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.109375
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 4.353
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.6/684962b9-d734-4a10-a0cb-45bc4d957c2c.json b/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.6/684962b9-d734-4a10-a0cb-45bc4d957c2c.json
deleted file mode 100644
index 12e48bcfb8c0c90973312cf3c0e2a975e67aa9ce..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.6/684962b9-d734-4a10-a0cb-45bc4d957c2c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.6/1762652579.7090619",
- "retrieved_timestamp": "1762652579.7090628",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lawnakk/BBALAW1.6",
- "developer": "Lawnakk",
- "inference_platform": "unknown",
- "id": "Lawnakk/BBALAW1.6"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5245437660961804
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.555356284691385
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36027190332326287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3238255033557047
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43684375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45071476063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.61/af87bb98-cc36-4c8d-9694-7e7428a899ac.json b/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.61/af87bb98-cc36-4c8d-9694-7e7428a899ac.json
deleted file mode 100644
index 4805c16fd1f97b9e85c989a9649b37c273129f74..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.61/af87bb98-cc36-4c8d-9694-7e7428a899ac.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.61/1762652579.709277",
- "retrieved_timestamp": "1762652579.7092779",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lawnakk/BBALAW1.61",
- "developer": "Lawnakk",
- "inference_platform": "unknown",
- "id": "Lawnakk/BBALAW1.61"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5771253607095839
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5548582474785428
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36631419939577037
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31711409395973156
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4355104166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4470578457446808
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.62/5dc300f1-e908-4d71-addc-2717e3702b12.json b/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.62/5dc300f1-e908-4d71-addc-2717e3702b12.json
deleted file mode 100644
index 61137551f62fc04076892ce09bd7756efbac24cb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.62/5dc300f1-e908-4d71-addc-2717e3702b12.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.62/1762652579.709492",
- "retrieved_timestamp": "1762652579.709493",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lawnakk/BBALAW1.62",
- "developer": "Lawnakk",
- "inference_platform": "unknown",
- "id": "Lawnakk/BBALAW1.62"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5046099903810778
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5580519941056026
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2824773413897281
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4343333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45445478723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.63/6005fc02-9f02-436a-a535-ec68a3c6dbc6.json b/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.63/6005fc02-9f02-436a-a535-ec68a3c6dbc6.json
deleted file mode 100644
index 09812cd8e58198d6129a300d752a47d437de323d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.63/6005fc02-9f02-436a-a535-ec68a3c6dbc6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.63/1762652579.709696",
- "retrieved_timestamp": "1762652579.709697",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lawnakk/BBALAW1.63",
- "developer": "Lawnakk",
- "inference_platform": "unknown",
- "id": "Lawnakk/BBALAW1.63"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44073835201709244
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5540633758841665
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37009063444108764
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31208053691275167
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4303333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4470578457446808
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.64/4a4ce0f8-c41f-469e-b7c7-a4e3d857377e.json b/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.64/4a4ce0f8-c41f-469e-b7c7-a4e3d857377e.json
deleted file mode 100644
index c0798c4c89b31281e618cbe4a3a468db57b0f8a0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1.64/4a4ce0f8-c41f-469e-b7c7-a4e3d857377e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1.64/1762652579.709901",
- "retrieved_timestamp": "1762652579.709902",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lawnakk/BBALAW1.64",
- "developer": "Lawnakk",
- "inference_platform": "unknown",
- "id": "Lawnakk/BBALAW1.64"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13946107439371977
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27790701865141654
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2483221476510067
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3446666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11153590425531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1/59b40f56-c27f-4b15-9288-b7033e2e4f26.json b/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1/59b40f56-c27f-4b15-9288-b7033e2e4f26.json
deleted file mode 100644
index 5799c27666e905ad319b9ddff89e7e22e4c32b1b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Lawnakk/Lawnakk_BBALAW1/59b40f56-c27f-4b15-9288-b7033e2e4f26.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lawnakk_BBALAW1/1762652579.708089",
- "retrieved_timestamp": "1762652579.70809",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lawnakk/BBALAW1",
- "developer": "Lawnakk",
- "inference_platform": "unknown",
- "id": "Lawnakk/BBALAW1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19054442213327305
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28723681696502185
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.009818731117824773
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24328859060402686
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4152708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11211768617021277
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_A/771366a5-e227-4ff8-b60f-744020994bec.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_A/771366a5-e227-4ff8-b60f-744020994bec.json
deleted file mode 100644
index 7de403a376fd00ffab50931b1de7fc13fd5c9dce..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_A/771366a5-e227-4ff8-b60f-744020994bec.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_CheckPoint_A/1762652579.714355",
- "retrieved_timestamp": "1762652579.714355",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/CheckPoint_A",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/CheckPoint_A"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45127927233074905
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4747699745968042
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05891238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2835570469798658
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4230833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28798204787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_B/4e44fd55-9538-4065-8763-5d1c3d00be5d.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_B/4e44fd55-9538-4065-8763-5d1c3d00be5d.json
deleted file mode 100644
index e51118dbead63e64d5b8a20076a8d8a70f43498a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_B/4e44fd55-9538-4065-8763-5d1c3d00be5d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_CheckPoint_B/1762652579.7146208",
- "retrieved_timestamp": "1762652579.714622",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/CheckPoint_B",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/CheckPoint_B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4439852923576111
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47799475378324896
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07175226586102719
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38984375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29072473404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_C/a4fe370d-1722-4fdf-bf75-8416baeaba19.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_C/a4fe370d-1722-4fdf-bf75-8416baeaba19.json
deleted file mode 100644
index c6971e6f6acc70a791207ee0ba3479271ac92a82..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_C/a4fe370d-1722-4fdf-bf75-8416baeaba19.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_CheckPoint_C/1762652579.714836",
- "retrieved_timestamp": "1762652579.714837",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/CheckPoint_C",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/CheckPoint_C"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34768968558979063
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45864215446207585
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05513595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4346145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30211103723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_R1/7eba2aef-5c97-4526-92a8-d62bd5b59b6f.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_R1/7eba2aef-5c97-4526-92a8-d62bd5b59b6f.json
deleted file mode 100644
index c037bf164db7ba7ade973131927a0e90acbbba74..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_CheckPoint_R1/7eba2aef-5c97-4526-92a8-d62bd5b59b6f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_CheckPoint_R1/1762652579.715039",
- "retrieved_timestamp": "1762652579.71504",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/CheckPoint_R1",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/CheckPoint_R1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17278376928771216
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4225419506658359
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04305135951661632
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4031458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22049534574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_AI_001/f6b84bde-67aa-4c50-a46e-1f80605037de.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_AI_001/f6b84bde-67aa-4c50-a46e-1f80605037de.json
deleted file mode 100644
index b25a48e9a1acbdda935999992fe8e5e85cc73b95..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_AI_001/f6b84bde-67aa-4c50-a46e-1f80605037de.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_LCARS_AI_001/1762652579.7152472",
- "retrieved_timestamp": "1762652579.715248",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/LCARS_AI_001",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/LCARS_AI_001"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31094495937445976
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42578875825590146
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.023413897280966767
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43836458333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2670378989361702
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_AI_1x4_003_SuperAI/db8614eb-2b53-460c-a80b-dceb47a9703f.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_AI_1x4_003_SuperAI/db8614eb-2b53-460c-a80b-dceb47a9703f.json
deleted file mode 100644
index 0aec10b89420ca88d0cca01f2417a10e1e360cae..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_AI_1x4_003_SuperAI/db8614eb-2b53-460c-a80b-dceb47a9703f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_LCARS_AI_1x4_003_SuperAI/1762652579.7154438",
- "retrieved_timestamp": "1762652579.715445",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/LCARS_AI_1x4_003_SuperAI",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/LCARS_AI_1x4_003_SuperAI"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41111251479407973
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49198503573704794
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05740181268882175
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4506145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29720744680851063
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 24.154
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_AI_StarTrek_Computer/a3e19823-43ac-44ac-9dee-960a98139fa8.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_AI_StarTrek_Computer/a3e19823-43ac-44ac-9dee-960a98139fa8.json
deleted file mode 100644
index 6ccf5e323071ef7d5bcd78d8e86a908d549d4c09..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_AI_StarTrek_Computer/a3e19823-43ac-44ac-9dee-960a98139fa8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_LCARS_AI_StarTrek_Computer/1762652579.7157388",
- "retrieved_timestamp": "1762652579.715741",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/LCARS_AI_StarTrek_Computer",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/LCARS_AI_StarTrek_Computer"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35825609383103496
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4446191188748297
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04078549848942598
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3950208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24584441489361702
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_TOP_SCORE/04631aa2-f1fd-4aea-ba88-53b474c71fe8.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_TOP_SCORE/04631aa2-f1fd-4aea-ba88-53b474c71fe8.json
deleted file mode 100644
index 9da6bb4508917395333c48d14a9a7e334e7adf73..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_LCARS_TOP_SCORE/04631aa2-f1fd-4aea-ba88-53b474c71fe8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_LCARS_TOP_SCORE/1762652579.716028",
- "retrieved_timestamp": "1762652579.716029",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/LCARS_TOP_SCORE",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/LCARS_TOP_SCORE"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43706587410293574
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5127371051825098
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06722054380664652
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2860738255033557
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42928125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3031083776595745
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_Mixtral_AI_SwahiliTron_7b/4f5fadb6-5fad-4b82-a027-1d4f497dc476.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_Mixtral_AI_SwahiliTron_7b/4f5fadb6-5fad-4b82-a027-1d4f497dc476.json
deleted file mode 100644
index f6458571cbf147905ee8705563a05c0f7c212996..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_Mixtral_AI_SwahiliTron_7b/4f5fadb6-5fad-4b82-a027-1d4f497dc476.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_Mixtral_AI_SwahiliTron_7b/1762652579.716297",
- "retrieved_timestamp": "1762652579.716299",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/Mixtral_AI_SwahiliTron_7b",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/Mixtral_AI_SwahiliTron_7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1533996462718919
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3055092453201354
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.013595166163141994
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34203125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12076130319148937
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWebAI_Human_AGI/8e1f811e-3e86-4440-a5dd-bf607aa02ad6.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWebAI_Human_AGI/8e1f811e-3e86-4440-a5dd-bf607aa02ad6.json
deleted file mode 100644
index 263b427554f3e48a57ef5579fa87f36d2621fb4b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWebAI_Human_AGI/8e1f811e-3e86-4440-a5dd-bf607aa02ad6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWebAI_Human_AGI/1762652579.7166212",
- "retrieved_timestamp": "1762652579.716622",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWebAI_Human_AGI",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWebAI_Human_AGI"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3388221031308041
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3374862127508733
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.014350453172205438
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39663541666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1478557180851064
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWebAI_Human_AGI_001/a4c9a905-1a7c-406a-ab38-6a5e71ed0bf5.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWebAI_Human_AGI_001/a4c9a905-1a7c-406a-ab38-6a5e71ed0bf5.json
deleted file mode 100644
index a7ef6fd804e74eed8e834e3216723a23a3a270df..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWebAI_Human_AGI_001/a4c9a905-1a7c-406a-ab38-6a5e71ed0bf5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWebAI_Human_AGI_001/1762652579.716855",
- "retrieved_timestamp": "1762652579.716856",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWebAI_Human_AGI_001",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWebAI_Human_AGI_001"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31181930610779396
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3433421938604874
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.019637462235649546
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39939583333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14261968085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_CyberTron_Ultra_7b/e8b992b8-9f0a-4bfb-ab53-3b07ca1ca117.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_CyberTron_Ultra_7b/e8b992b8-9f0a-4bfb-ab53-3b07ca1ca117.json
deleted file mode 100644
index 945c5e8bb1174d2f07709408284d108319f7862f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_CyberTron_Ultra_7b/e8b992b8-9f0a-4bfb-ab53-3b07ca1ca117.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_CyberTron_Ultra_7b/1762652579.71707",
- "retrieved_timestamp": "1762652579.717071",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWeb_AI_CyberTron_Ultra_7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15557276914143361
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48107736108561827
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.013595166163141994
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29278523489932884
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41362499999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2865691489361702
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAGI_001_M2/daa704a9-2eed-4549-a847-3606c9e8a733.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAGI_001_M2/daa704a9-2eed-4549-a847-3606c9e8a733.json
deleted file mode 100644
index 8681a5aeadc15a5389bf88b4220098e8605dea42..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAGI_001_M2/daa704a9-2eed-4549-a847-3606c9e8a733.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAGI_001_M2/1762652579.71728",
- "retrieved_timestamp": "1762652579.717281",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWeb_AI_HumanAGI_001_M2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39395138233221183
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4888172059118469
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03851963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4503020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.300531914893617
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAGI_002/3a6cfbae-80c1-4ec6-9c14-1ddeeb6e7138.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAGI_002/3a6cfbae-80c1-4ec6-9c14-1ddeeb6e7138.json
deleted file mode 100644
index 8aec5a3d9bf6e6ac01fb4974aa3378e459885095..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAGI_002/3a6cfbae-80c1-4ec6-9c14-1ddeeb6e7138.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAGI_002/1762652579.71767",
- "retrieved_timestamp": "1762652579.7176719",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWeb_AI_HumanAGI_002",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWeb_AI_HumanAGI_002"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40876430094371824
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5043871825389313
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06646525679758308
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28691275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48648958333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3058510638297872
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_001/f177b7f7-7143-4f72-9f9d-54fe2bc9797b.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_001/f177b7f7-7143-4f72-9f9d-54fe2bc9797b.json
deleted file mode 100644
index a2e7d3f1cd1735ffb0c4e82e0b6abcb2fc4c9ee6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_001/f177b7f7-7143-4f72-9f9d-54fe2bc9797b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_001/1762652579.717986",
- "retrieved_timestamp": "1762652579.717987",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWeb_AI_HumanAI_001",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWeb_AI_HumanAI_001"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22516589316347294
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33440360243051986
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01661631419939577
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28859060402684567
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38603125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1270777925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_006/cdbebbea-4749-472b-8cec-5da5ffa96d65.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_006/cdbebbea-4749-472b-8cec-5da5ffa96d65.json
deleted file mode 100644
index 8adb61d18c16d3fa1b15fad68ccc6d62a427e9f3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_006/cdbebbea-4749-472b-8cec-5da5ffa96d65.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_006/1762652579.718229",
- "retrieved_timestamp": "1762652579.71823",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWeb_AI_HumanAI_006",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWeb_AI_HumanAI_006"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14300832901146734
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3301800420981355
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.010574018126888218
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3567916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11353058510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_007/3143a635-10da-4cb5-9c2f-eae2988d9e60.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_007/3143a635-10da-4cb5-9c2f-eae2988d9e60.json
deleted file mode 100644
index f9e8ec64a0795422a9167822646d769649b36528..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_007/3143a635-10da-4cb5-9c2f-eae2988d9e60.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_007/1762652579.718461",
- "retrieved_timestamp": "1762652579.718461",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWeb_AI_HumanAI_007",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWeb_AI_HumanAI_007"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3351751131442351
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3415665794743605
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.022658610271903322
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28859060402684567
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40962499999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13522273936170212
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_009_CHAT/a6d3b7b1-8834-4b74-8849-6d80381c46f5.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_009_CHAT/a6d3b7b1-8834-4b74-8849-6d80381c46f5.json
deleted file mode 100644
index 5fdb2920f524e2b4035075e27a62e4dfd19bf07f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_009_CHAT/a6d3b7b1-8834-4b74-8849-6d80381c46f5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_009_CHAT/1762652579.718692",
- "retrieved_timestamp": "1762652579.718693",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWeb_AI_HumanAI_009_CHAT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2973310815303395
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3306728717792965
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01661631419939577
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1432845744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_010_CHAT/7f53cef7-fba6-4802-93a2-b54f82a32d74.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_010_CHAT/7f53cef7-fba6-4802-93a2-b54f82a32d74.json
deleted file mode 100644
index 6329d771b15e96cb55624cf95726ebb12046e796..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_010_CHAT/7f53cef7-fba6-4802-93a2-b54f82a32d74.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_010_CHAT/1762652579.7189271",
- "retrieved_timestamp": "1762652579.7189288",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWeb_AI_HumanAI_010_CHAT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2506948230694557
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33363164762455844
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01812688821752266
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41371874999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14303523936170212
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT/bc7bf4d0-45e9-4b37-8e5f-edc92fb1bd66.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT/bc7bf4d0-45e9-4b37-8e5f-edc92fb1bd66.json
deleted file mode 100644
index 373b05b481cd5c29cbcd4e5bc4a31ee393dd77af..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT/bc7bf4d0-45e9-4b37-8e5f-edc92fb1bd66.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT/1762652579.719242",
- "retrieved_timestamp": "1762652579.719243",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3148667757106699
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3522609512356862
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.014350453172205438
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3831458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15949135638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT_ML/fbd83964-530c-4d0e-a305-9f8451affb23.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT_ML/fbd83964-530c-4d0e-a305-9f8451affb23.json
deleted file mode 100644
index e061856aec1fa81f4166d2108319bc2f9eee562e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT_ML/fbd83964-530c-4d0e-a305-9f8451affb23.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT_ML/1762652579.719551",
- "retrieved_timestamp": "1762652579.719552",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37524213531208306
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39840187861283577
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0256797583081571
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29278523489932884
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42391666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2018783244680851
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1/10d76569-edca-47db-abf2-1d0fd73df198.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1/10d76569-edca-47db-abf2-1d0fd73df198.json
deleted file mode 100644
index b6e9a467c1e692293479adddf3b8b2bc22049f53..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1/10d76569-edca-47db-abf2-1d0fd73df198.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1/1762652579.7198021",
- "retrieved_timestamp": "1762652579.7198029",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWeb_AI_HumanAI_011_INSTRUCT_ML_r1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4049677079039171
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48583341042911066
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05513595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29278523489932884
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3921354166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2956283244680851
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/431f8459-3c12-4260-a158-c58ec910590d.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/431f8459-3c12-4260-a158-c58ec910590d.json
deleted file mode 100644
index 49ec3a51480c21b1e60aed017c8b234f2b3a41ef..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/431f8459-3c12-4260-a158-c58ec910590d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/1762652579.720226",
- "retrieved_timestamp": "1762652579.720227",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30664858131978706
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45768864760562744
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0445619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42540625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23179853723404256
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/bcd8c141-d286-4567-bb06-934e546a5c7c.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/bcd8c141-d286-4567-bb06-934e546a5c7c.json
deleted file mode 100644
index 8378dc34129ad99920482f6424adc3551a490e3c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/bcd8c141-d286-4567-bb06-934e546a5c7c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_IA/1762652579.720018",
- "retrieved_timestamp": "1762652579.7200189",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_IA"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30355124403250044
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4575107149412439
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0445619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011744966442953
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42534374999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23287898936170212
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_MX/9cc77018-d090-4202-bcf5-d0031097b84e.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_MX/9cc77018-d090-4202-bcf5-d0031097b84e.json
deleted file mode 100644
index a155f454f3a8678545ead5a3900ce9c10c6e9b63..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_MX/9cc77018-d090-4202-bcf5-d0031097b84e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_MX/1762652579.7204201",
- "retrieved_timestamp": "1762652579.720421",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_MX"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3065987136353764
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3158421938604874
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015105740181268883
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34438541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11070478723404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/0b365c44-3cc2-4149-8614-7de6b6c2581d.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/0b365c44-3cc2-4149-8614-7de6b6c2581d.json
deleted file mode 100644
index 56fc9fd4a3d26997e6ba676f98d9ebce6cbc5684..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/0b365c44-3cc2-4149-8614-7de6b6c2581d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/1762652579.72064",
- "retrieved_timestamp": "1762652579.7206411",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35788153211257245
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4476544560399054
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04229607250755287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41340625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23761635638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/dc90b971-313a-4a76-b042-350adf37a43c.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/dc90b971-313a-4a76-b042-350adf37a43c.json
deleted file mode 100644
index 21577c849a3bd847880bc8bded5a033270c8805b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/dc90b971-313a-4a76-b042-350adf37a43c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_012_INSTRUCT_XA/1762652579.720855",
- "retrieved_timestamp": "1762652579.720855",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWeb_AI_HumanAI_012_INSTRUCT_XA"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37976347203198624
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44827466097749213
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04003021148036254
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4148020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2388630319148936
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_RP/a4a38b96-036f-40db-8a0b-024a36f004f5.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_RP/a4a38b96-036f-40db-8a0b-024a36f004f5.json
deleted file mode 100644
index a674eb7083fa600436c83c75cf3030e0330983c8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_RP/a4a38b96-036f-40db-8a0b-024a36f004f5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_RP/1762652579.721039",
- "retrieved_timestamp": "1762652579.7210398",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWeb_AI_HumanAI_RP",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWeb_AI_HumanAI_RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2541168543907942
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33230179059744286
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01283987915407855
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2751677852348993
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3882604166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1323969414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_TextVision/558a0ed7-a667-421e-bbab-094b46274239.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_TextVision/558a0ed7-a667-421e-bbab-094b46274239.json
deleted file mode 100644
index 29293bcfb6b01b7f105f9086081d826bf6280d2a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_AI_HumanAI_TextVision/558a0ed7-a667-421e-bbab-094b46274239.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_AI_HumanAI_TextVision/1762652579.7212439",
- "retrieved_timestamp": "1762652579.7212448",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWeb_AI_HumanAI_TextVision",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWeb_AI_HumanAI_TextVision"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062740196013245
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33536617928965984
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.014350453172205438
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39384375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13871343085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_HumanAI_M1/ee856df0-01ea-4f06-9323-951144c9e82f.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_HumanAI_M1/ee856df0-01ea-4f06-9323-951144c9e82f.json
deleted file mode 100644
index 764f11cd83e516b0575f9628c54f39c16a879d40..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_HumanAI_M1/ee856df0-01ea-4f06-9323-951144c9e82f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_HumanAI_M1/1762652579.721453",
- "retrieved_timestamp": "1762652579.721453",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWeb_HumanAI_M1",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWeb_HumanAI_M1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3582062261466243
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35632705798398107
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.024924471299093656
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36711458333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1663065159574468
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_HumanAI_M2/4ea0436d-6ec9-40db-af56-2f7f1b0317df.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_HumanAI_M2/4ea0436d-6ec9-40db-af56-2f7f1b0317df.json
deleted file mode 100644
index 65bd79fbbd2e41c157ea40638fcb4bc95a44b0d6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_HumanAI_M2/4ea0436d-6ec9-40db-af56-2f7f1b0317df.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_HumanAI_M2/1762652579.7216609",
- "retrieved_timestamp": "1762652579.721662",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWeb_HumanAI_M2",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWeb_HumanAI_M2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3750171766468526
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39308772552915555
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.028700906344410877
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3751458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2010472074468085
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_HumanAI_M3/d5dd0be3-e7a7-4636-b513-3c1d5532807f.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_HumanAI_M3/d5dd0be3-e7a7-4636-b513-3c1d5532807f.json
deleted file mode 100644
index a93e513f28a5d8519ea0eb8e34cf952196f6ab27..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer_SpydazWeb_HumanAI_M3/d5dd0be3-e7a7-4636-b513-3c1d5532807f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer_SpydazWeb_HumanAI_M3/1762652579.721856",
- "retrieved_timestamp": "1762652579.721857",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/SpydazWeb_HumanAI_M3",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/SpydazWeb_HumanAI_M3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1578711153073844
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31272572546166244
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.00906344410876133
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3914270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11486037234042554
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_12/b4b57280-49db-4a07-929f-dbe2f222250c.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_12/b4b57280-49db-4a07-929f-dbe2f222250c.json
deleted file mode 100644
index b9d90ecfc6e32d202e4290d1db45d44395280484..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_12/b4b57280-49db-4a07-929f-dbe2f222250c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_12/1762652579.722054",
- "retrieved_timestamp": "1762652579.722055",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_12",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_12"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2764985793250797
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31633960292107943
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.013595166163141994
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35815624999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11369680851063829
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_14/6233aac6-0ce3-4f3c-8ee0-87d2482d3ea2.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_14/6233aac6-0ce3-4f3c-8ee0-87d2482d3ea2.json
deleted file mode 100644
index ffb71597de3ac296fbfb1225c506ba9a3a30ad25..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_14/6233aac6-0ce3-4f3c-8ee0-87d2482d3ea2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_14/1762652579.722256",
- "retrieved_timestamp": "1762652579.722257",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_14",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_14"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1811770546594148
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2988848127354542
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.012084592145015106
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3395208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11394614361702128
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_001/51d4724b-c85c-4ad4-a4bd-9be93cd99a2a.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_001/51d4724b-c85c-4ad4-a4bd-9be93cd99a2a.json
deleted file mode 100644
index a4a80808ffc4af6f2d8d05dbd51ca8d55d9b696a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_001/51d4724b-c85c-4ad4-a4bd-9be93cd99a2a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_001/1762652579.72245",
- "retrieved_timestamp": "1762652579.722451",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_001",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_001"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4505046609662362
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4609124425176902
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0634441087613293
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42559375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2734375
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_002/86e8ff02-0dd2-4023-ab18-359d24a8a4fd.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_002/86e8ff02-0dd2-4023-ab18-359d24a8a4fd.json
deleted file mode 100644
index 962a21159da06680d158574e9850943971ddf598..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_002/86e8ff02-0dd2-4023-ab18-359d24a8a4fd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_002/1762652579.7226508",
- "retrieved_timestamp": "1762652579.7226508",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_002",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_002"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5306885729863429
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4682582050072746
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0581570996978852
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42546875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28939494680851063
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_MUSR/285688d5-c7ad-437b-a54c-9e6108d85267.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_MUSR/285688d5-c7ad-437b-a54c-9e6108d85267.json
deleted file mode 100644
index b4eb63e3060756d8e946cfce79b2014aa64cd7de..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_MUSR/285688d5-c7ad-437b-a54c-9e6108d85267.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_MUSR/1762652579.722848",
- "retrieved_timestamp": "1762652579.7228491",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MUSR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.478606763387811
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4671769411194033
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48689583333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2828291223404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_MasterCoder/85ce2909-a5f9-413a-8719-cd0a66874535.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_MasterCoder/85ce2909-a5f9-413a-8719-cd0a66874535.json
deleted file mode 100644
index d62a462cfaa12b8eb914dc4c9396938e81633bbc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_MasterCoder/85ce2909-a5f9-413a-8719-cd0a66874535.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_MasterCoder/1762652579.723048",
- "retrieved_timestamp": "1762652579.723048",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_MasterCoder"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.414259719765777
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4689417813020516
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06117824773413897
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47197916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27194148936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_001/8a7df636-f1bb-4a74-bb7f-8a412edf6bd1.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_001/8a7df636-f1bb-4a74-bb7f-8a412edf6bd1.json
deleted file mode 100644
index 60608b24da32629eff454f22e6fa8e3cd1a559c9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_001/8a7df636-f1bb-4a74-bb7f-8a412edf6bd1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_001/1762652579.723258",
- "retrieved_timestamp": "1762652579.723258",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_001"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4571492528712705
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48178882135920675
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06948640483383686
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27684563758389263
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47784375000000007
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2681183510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_003/79336acd-d465-4938-af7f-f7a688f46fd4.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_003/79336acd-d465-4938-af7f-f7a688f46fd4.json
deleted file mode 100644
index c6f0eb77c8dd55b878bfecb1cecc1b83393b2698..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_003/79336acd-d465-4938-af7f-f7a688f46fd4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_003/1762652579.723467",
- "retrieved_timestamp": "1762652579.723468",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_003"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6200148938150774
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4755509035158693
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06948640483383686
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42019791666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29986702127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent/ed000ee0-4193-46c4-8114-2ea3dbfec9f7.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent/ed000ee0-4193-46c4-8114-2ea3dbfec9f7.json
deleted file mode 100644
index d58852002bc1a559b272341486119e45150acf46..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent/ed000ee0-4193-46c4-8114-2ea3dbfec9f7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent/1762652579.7236722",
- "retrieved_timestamp": "1762652579.7236722",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_AdvancedStudent"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5950854842927876
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4927473238025393
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.054380664652567974
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5198229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2999501329787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_Student/89f92d24-19c1-4021-819d-9c7ed717046c.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_Student/89f92d24-19c1-4021-819d-9c7ed717046c.json
deleted file mode 100644
index 88b3dd72bf44d431420c7d7dccbdd5ac15557c47..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_Student/89f92d24-19c1-4021-819d-9c7ed717046c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_Student/1762652579.723874",
- "retrieved_timestamp": "1762652579.723874",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Student"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5735781060918363
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48808115770970123
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0513595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.50975
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.292719414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_Teacher/24fa44cb-86d9-4e67-be8f-42f7fc574d52.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_Teacher/24fa44cb-86d9-4e67-be8f-42f7fc574d52.json
deleted file mode 100644
index 703f7effeafc32ae39bbd77145b6cbdf001f8f4b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_Teacher/24fa44cb-86d9-4e67-be8f-42f7fc574d52.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Math_Teacher/1762652579.7241092",
- "retrieved_timestamp": "1762652579.7241101",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Math_Teacher"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5772250960784053
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4805094960871836
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.054380664652567974
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2860738255033557
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5222395833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2956283244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_001/b13652e3-43f1-4670-94f7-1a0bbf622f33.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_001/b13652e3-43f1-4670-94f7-1a0bbf622f33.json
deleted file mode 100644
index 0ecf40f5dd591cdc2141eda4537e1833047e799f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_001/b13652e3-43f1-4670-94f7-1a0bbf622f33.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_001/1762652579.72431",
- "retrieved_timestamp": "1762652579.724311",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_001"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5817963004827191
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4907982146977475
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05060422960725076
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4486041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29055851063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_002/8201723e-92fb-4207-afa8-df7db794c889.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_002/8201723e-92fb-4207-afa8-df7db794c889.json
deleted file mode 100644
index 4d5bb7c8030a39c71baac5e3b4301e1fe3f6246a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_002/8201723e-92fb-4207-afa8-df7db794c889.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_002/1762652579.7245262",
- "retrieved_timestamp": "1762652579.7245262",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_002"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.546150879665953
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4655028607746287
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04984894259818731
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2785234899328859
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45108333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28665226063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_Coder/e166fa17-c285-466e-ab2e-1eb106ebd271.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_Coder/e166fa17-c285-466e-ab2e-1eb106ebd271.json
deleted file mode 100644
index 7e36131297ac1f226585c4a87bb750dd9563fda9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_Coder/e166fa17-c285-466e-ab2e-1eb106ebd271.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_Coder/1762652579.724742",
- "retrieved_timestamp": "1762652579.724742",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Coder"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4923702442851634
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46376531085099754
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.054380664652567974
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27348993288590606
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5624583333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28897938829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_Math/983323f2-7caa-42cb-8838-8ea041303a70.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_Math/983323f2-7caa-42cb-8838-8ea041303a70.json
deleted file mode 100644
index af1ee1558fc6274e1c9537a1a6afdbef412db740..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_Math/983323f2-7caa-42cb-8838-8ea041303a70.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_Math/1762652579.7249558",
- "retrieved_timestamp": "1762652579.724957",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_Math"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5033112142448702
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4676503002757066
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04758308157099698
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4325729166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29130651595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_MathMaster/a79378f7-01b3-4bf0-8b76-2e670d2a7366.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_MathMaster/a79378f7-01b3-4bf0-8b76-2e670d2a7366.json
deleted file mode 100644
index 577f533c5aa37e1cde866ade78e86ad2335a1b30..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_MathMaster/a79378f7-01b3-4bf0-8b76-2e670d2a7366.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_OmG_MathMaster/1762652579.7251709",
- "retrieved_timestamp": "1762652579.7251709",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_OmG_MathMaster"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5558429411738631
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47422312505675873
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05362537764350453
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.287751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45098958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2672041223404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Student_Coder/1e7531fc-9f12-4c7c-8bf5-44511c37c23b.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Student_Coder/1e7531fc-9f12-4c7c-8bf5-44511c37c23b.json
deleted file mode 100644
index 96e6f8469f1088b80d6ee5f245ba9e4f0c65a1de..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Student_Coder/1e7531fc-9f12-4c7c-8bf5-44511c37c23b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Student_Coder/1762652579.725384",
- "retrieved_timestamp": "1762652579.725385",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Student_Coder"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5449518388985669
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4650844324968853
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06570996978851963
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43883333333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27684507978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Teacher_Coder/64c0088b-f9e7-4a9a-b449-3e1b514370ff.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Teacher_Coder/64c0088b-f9e7-4a9a-b449-3e1b514370ff.json
deleted file mode 100644
index 4a003e76295bc168a266b0acff23311005840d53..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Teacher_Coder/64c0088b-f9e7-4a9a-b449-3e1b514370ff.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Teacher_Coder/1762652579.7256",
- "retrieved_timestamp": "1762652579.725601",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Teacher_Coder"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5081572449988254
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47965526444811907
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0649546827794562
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4338125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28449135638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Top_Student/d652c8f6-d5b4-482f-91c7-5eb9529765c1.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Top_Student/d652c8f6-d5b4-482f-91c7-5eb9529765c1.json
deleted file mode 100644
index b2e272a297b01a2fbbf11e4f9eddfbdaded28090..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_Top_Student/d652c8f6-d5b4-482f-91c7-5eb9529765c1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_Top_Student/1762652579.725811",
- "retrieved_timestamp": "1762652579.725811",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_Top_Student"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6039530667517742
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49877449828070924
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07250755287009064
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5397916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30244348404255317
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_X1/7c72e837-92fd-4f3b-9c4f-205ffc93ac70.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_X1/7c72e837-92fd-4f3b-9c4f-205ffc93ac70.json
deleted file mode 100644
index 48ee4f7d4d318c0436820acfbada8c9c2299a83e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_X1/7c72e837-92fd-4f3b-9c4f-205ffc93ac70.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_X1/1762652579.7260191",
- "retrieved_timestamp": "1762652579.72602",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.427323944910615
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47589342126093026
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05664652567975831
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4231770833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2890625
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_X2/169fe3b3-527a-408f-9442-5bc3616cc320.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_X2/169fe3b3-527a-408f-9442-5bc3616cc320.json
deleted file mode 100644
index 1ce8b240c0cc19077c7e03a0c5b3fcbdd713c03c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_R1_X2/169fe3b3-527a-408f-9442-5bc3616cc320.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_R1_X2/1762652579.7262201",
- "retrieved_timestamp": "1762652579.726221",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_AGI_R1_X2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5433782364127182
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4785559277736029
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06117824773413897
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46953125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29205452127659576
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_RP_R1/fd4405cf-9849-4606-a01c-a20459198853.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_RP_R1/fd4405cf-9849-4606-a01c-a20459198853.json
deleted file mode 100644
index b572d74c3db88a25ecf1db854eea6a7ea7ce54be..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_AGI_RP_R1/fd4405cf-9849-4606-a01c-a20459198853.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_AGI_RP_R1/1762652579.726439",
- "retrieved_timestamp": "1762652579.72644",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_AGI_RP_R1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5426036250482054
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4701061648636955
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26929530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42013541666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28939494680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_BIBLE_002/060f29d1-8b1d-4651-808d-b1419bd76cd9.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_BIBLE_002/060f29d1-8b1d-4651-808d-b1419bd76cd9.json
deleted file mode 100644
index face45a369d427553c0af681116befec2257fe47..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_BIBLE_002/060f29d1-8b1d-4651-808d-b1419bd76cd9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_BIBLE_002/1762652579.72666",
- "retrieved_timestamp": "1762652579.7266612",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_BIBLE_002",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_BIBLE_002"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21949538336059432
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3289070186514165
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.017371601208459216
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34069791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13680186170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_ChatML_002/07981f28-b019-42f8-b14b-44ab73ebaa0a.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_ChatML_002/07981f28-b019-42f8-b14b-44ab73ebaa0a.json
deleted file mode 100644
index abb87a72ea478eefc4a423c4e12af2b1f5e82e96..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_ChatML_002/07981f28-b019-42f8-b14b-44ab73ebaa0a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_ChatML_002/1762652579.7268748",
- "retrieved_timestamp": "1762652579.726876",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_ChatML_002",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_ChatML_002"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24122772022677608
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3106383598957094
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.011329305135951661
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3623125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10945811170212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_ChatQA/4e72d3b7-4ebb-470d-8f86-66d6cb28095f.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_ChatQA/4e72d3b7-4ebb-470d-8f86-66d6cb28095f.json
deleted file mode 100644
index 04e94baff056599ad8a51ee6df4ccb5c8f33911f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_ChatQA/4e72d3b7-4ebb-470d-8f86-66d6cb28095f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_ChatQA/1762652579.727107",
- "retrieved_timestamp": "1762652579.727108",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_ChatQA",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_ChatQA"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1414591062824417
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32359493837413505
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.009818731117824773
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3447291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14752327127659576
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_ChatQA_003/471aac2a-5c4b-4b1b-a56b-490fafc444d8.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_ChatQA_003/471aac2a-5c4b-4b1b-a56b-490fafc444d8.json
deleted file mode 100644
index 14a58bd00cdfd509b0d8e54d6282e619635d0b69..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_ChatQA_003/471aac2a-5c4b-4b1b-a56b-490fafc444d8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_ChatQA_003/1762652579.727351",
- "retrieved_timestamp": "1762652579.7273521",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_ChatQA_003",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_ChatQA_003"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22091938279321088
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3171811407815537
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.010574018126888218
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38184375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11328125
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_TEMP_/f44f513c-0814-4f3b-94a4-9e28318da40e.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_TEMP_/f44f513c-0814-4f3b-94a4-9e28318da40e.json
deleted file mode 100644
index 8c5d3b3697ff75a485ddb90e620ab4293f45e63d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_TEMP_/f44f513c-0814-4f3b-94a4-9e28318da40e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_TEMP_/1762652579.7275891",
- "retrieved_timestamp": "1762652579.7275898",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_TEMP_",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_TEMP_"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47953097780555587
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.495695749059555
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12386706948640483
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42175
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3120844414893617
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_Top_Teacher_/a4beba0f-b860-4d7d-b1c3-0f569ba59171.json b/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_Top_Teacher_/a4beba0f-b860-4d7d-b1c3-0f569ba59171.json
deleted file mode 100644
index af98c2e3d5a9792c3795fd0c3b8dc70c8492fcbb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LeroyDyer/LeroyDyer__Spydaz_Web_AI_Top_Teacher_/a4beba0f-b860-4d7d-b1c3-0f569ba59171.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LeroyDyer__Spydaz_Web_AI_Top_Teacher_/1762652579.728002",
- "retrieved_timestamp": "1762652579.728004",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LeroyDyer/_Spydaz_Web_AI_Top_Teacher_",
- "developer": "LeroyDyer",
- "inference_platform": "unknown",
- "id": "LeroyDyer/_Spydaz_Web_AI_Top_Teacher_"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44038817005545283
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48909617780536035
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11555891238670694
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4366041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3149933510638298
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LightningRodLabs/LightningRodLabs_Flashlight-v1.0/cd4408c3-d966-4195-bcf2-5bc80eca1501.json b/leaderboard_data/HFOpenLLMv2/LightningRodLabs/LightningRodLabs_Flashlight-v1.0/cd4408c3-d966-4195-bcf2-5bc80eca1501.json
deleted file mode 100644
index 8cc529ce4a3f4cb635343f7a415d521d3698fffe..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LightningRodLabs/LightningRodLabs_Flashlight-v1.0/cd4408c3-d966-4195-bcf2-5bc80eca1501.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LightningRodLabs_Flashlight-v1.0/1762652579.7282822",
- "retrieved_timestamp": "1762652579.728283",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LightningRodLabs/Flashlight-v1.0",
- "developer": "LightningRodLabs",
- "inference_platform": "unknown",
- "id": "LightningRodLabs/Flashlight-v1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6745446526327921
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6876833310149727
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49697885196374625
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3422818791946309
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41009375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5402260638297872
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LightningRodLabs/LightningRodLabs_Flashlight-v1.1/64c75370-981d-43ae-9823-d4fb0696d468.json b/leaderboard_data/HFOpenLLMv2/LightningRodLabs/LightningRodLabs_Flashlight-v1.1/64c75370-981d-43ae-9823-d4fb0696d468.json
deleted file mode 100644
index 0e4b603705f422bf366f7dab0bdfc330371dcf55..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LightningRodLabs/LightningRodLabs_Flashlight-v1.1/64c75370-981d-43ae-9823-d4fb0696d468.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LightningRodLabs_Flashlight-v1.1/1762652579.728596",
- "retrieved_timestamp": "1762652579.728597",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LightningRodLabs/Flashlight-v1.1",
- "developer": "LightningRodLabs",
- "inference_platform": "unknown",
- "id": "LightningRodLabs/Flashlight-v1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6720967034136092
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6901141327534415
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5324773413897281
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33976510067114096
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4047604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5415558510638298
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Phi3ForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LightningRodLabs/LightningRodLabs_Flashlight-v1.2/404afbae-0393-48e6-874c-e1cb28e9a1eb.json b/leaderboard_data/HFOpenLLMv2/LightningRodLabs/LightningRodLabs_Flashlight-v1.2/404afbae-0393-48e6-874c-e1cb28e9a1eb.json
deleted file mode 100644
index e4fb3456449485ee7f65475b0257ca55a4ba1454..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LightningRodLabs/LightningRodLabs_Flashlight-v1.2/404afbae-0393-48e6-874c-e1cb28e9a1eb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LightningRodLabs_Flashlight-v1.2/1762652579.728818",
- "retrieved_timestamp": "1762652579.728819",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LightningRodLabs/Flashlight-v1.2",
- "developer": "LightningRodLabs",
- "inference_platform": "unknown",
- "id": "LightningRodLabs/Flashlight-v1.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4359920566319587
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3264526807518731
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1555891238670695
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23573825503355705
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45536458333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24850398936170212
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-2B-SLERP-V1/d53a7070-911a-4a5e-ba0c-766c4f39b3f5.json b/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-2B-SLERP-V1/d53a7070-911a-4a5e-ba0c-766c4f39b3f5.json
deleted file mode 100644
index e942db1002d0613d4a765cfbfe0e45a52242e989..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-2B-SLERP-V1/d53a7070-911a-4a5e-ba0c-766c4f39b3f5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-2B-SLERP-V1/1762652579.7290292",
- "retrieved_timestamp": "1762652579.72903",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1",
- "developer": "Lil-R",
- "inference_platform": "unknown",
- "id": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5823459531820016
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4287069505821554
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09138972809667674
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43746875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2677859042553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-2B-SLERP-V2/25368664-1f32-4d69-9afc-91d58efd01e2.json b/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-2B-SLERP-V2/25368664-1f32-4d69-9afc-91d58efd01e2.json
deleted file mode 100644
index c7153e4d8cc73e0e2132dcb946c1b4f4b2054c3b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-2B-SLERP-V2/25368664-1f32-4d69-9afc-91d58efd01e2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-2B-SLERP-V2/1762652579.729285",
- "retrieved_timestamp": "1762652579.729285",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2",
- "developer": "Lil-R",
- "inference_platform": "unknown",
- "id": "Lil-R/2_PRYMMAL-ECE-2B-SLERP-V2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5542693386880144
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43764741906109417
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09441087613293052
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44816666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2744348404255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V1/dcadbfb3-fbeb-4108-bc27-7ccfc7ba1e3a.json b/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V1/dcadbfb3-fbeb-4108-bc27-7ccfc7ba1e3a.json
deleted file mode 100644
index 4c2b872ed4fb08c3e487bf36e812faac312891ae..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V1/dcadbfb3-fbeb-4108-bc27-7ccfc7ba1e3a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V1/1762652579.7297568",
- "retrieved_timestamp": "1762652579.7297568",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1",
- "developer": "Lil-R",
- "inference_platform": "unknown",
- "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10733742026711349
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30525797550329686
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0007552870090634441
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25083892617449666
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3910833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11236702127659574
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V2/41c47381-66d5-4d3a-8bfb-4269cb882385.json b/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V2/41c47381-66d5-4d3a-8bfb-4269cb882385.json
deleted file mode 100644
index 2a4c35a11ae53745193fee295ffcaedde2cc98cf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V2/41c47381-66d5-4d3a-8bfb-4269cb882385.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V2/1762652579.729984",
- "retrieved_timestamp": "1762652579.729985",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2",
- "developer": "Lil-R",
- "inference_platform": "unknown",
- "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10733742026711349
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30525797550329686
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0007552870090634441
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25083892617449666
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3910833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11236702127659574
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V3/0c21359f-8f0b-44a8-813e-a5f612f13658.json b/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V3/0c21359f-8f0b-44a8-813e-a5f612f13658.json
deleted file mode 100644
index 1067477e2716da340c031f7e814593eea10496d3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V3/0c21359f-8f0b-44a8-813e-a5f612f13658.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-7B-SLERP-V3/1762652579.730203",
- "retrieved_timestamp": "1762652579.730203",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3",
- "developer": "Lil-R",
- "inference_platform": "unknown",
- "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP-V3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22346706738121516
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.357839880712804
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.006042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25671140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4107083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18168218085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP/aa396cb3-10aa-4777-a185-fcb38ffc5ec3.json b/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP/aa396cb3-10aa-4777-a185-fcb38ffc5ec3.json
deleted file mode 100644
index f87fcf58b3a740d17e12d4f0534a3008922243e1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_2_PRYMMAL-ECE-7B-SLERP/aa396cb3-10aa-4777-a185-fcb38ffc5ec3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lil-R_2_PRYMMAL-ECE-7B-SLERP/1762652579.7294989",
- "retrieved_timestamp": "1762652579.7294998",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lil-R/2_PRYMMAL-ECE-7B-SLERP",
- "developer": "Lil-R",
- "inference_platform": "unknown",
- "id": "Lil-R/2_PRYMMAL-ECE-7B-SLERP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5577412376937636
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5556642048146725
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3632930513595166
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43960416666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45071476063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_PRYMMAL-ECE-1B-SLERP-V1/a863e655-ee86-4f39-ae1a-0a65992f7eb4.json b/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_PRYMMAL-ECE-1B-SLERP-V1/a863e655-ee86-4f39-ae1a-0a65992f7eb4.json
deleted file mode 100644
index 97184e57e16153c3c37faff35b871316c746a80d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_PRYMMAL-ECE-1B-SLERP-V1/a863e655-ee86-4f39-ae1a-0a65992f7eb4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lil-R_PRYMMAL-ECE-1B-SLERP-V1/1762652579.7304142",
- "retrieved_timestamp": "1762652579.730415",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lil-R/PRYMMAL-ECE-1B-SLERP-V1",
- "developer": "Lil-R",
- "inference_platform": "unknown",
- "id": "Lil-R/PRYMMAL-ECE-1B-SLERP-V1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2874395492847866
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41904526564708194
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10347432024169184
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39743749999999994
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2925531914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_PRYMMAL-ECE-7B-SLERP-V8/6a81c514-57b9-4a45-9a1a-0378e7554d04.json b/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_PRYMMAL-ECE-7B-SLERP-V8/6a81c514-57b9-4a45-9a1a-0378e7554d04.json
deleted file mode 100644
index 480fc05d2fa3b09f04b51751eec1effa2d523da7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Lil-R/Lil-R_PRYMMAL-ECE-7B-SLERP-V8/6a81c514-57b9-4a45-9a1a-0378e7554d04.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lil-R_PRYMMAL-ECE-7B-SLERP-V8/1762652579.7306318",
- "retrieved_timestamp": "1762652579.730633",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lil-R/PRYMMAL-ECE-7B-SLERP-V8",
- "developer": "Lil-R",
- "inference_platform": "unknown",
- "id": "Lil-R/PRYMMAL-ECE-7B-SLERP-V8"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1258471965495995
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2955092966258663
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.009818731117824773
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36314583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11278257978723404
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_10PRYMMAL-3B-slerp/e9371530-675d-48d1-9145-7ea15c893833.json b/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_10PRYMMAL-3B-slerp/e9371530-675d-48d1-9145-7ea15c893833.json
deleted file mode 100644
index 4ccb0fe089fdafea2ac5c4de68ad55dca6e7e780..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_10PRYMMAL-3B-slerp/e9371530-675d-48d1-9145-7ea15c893833.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LilRg_10PRYMMAL-3B-slerp/1762652579.7308428",
- "retrieved_timestamp": "1762652579.7308428",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LilRg/10PRYMMAL-3B-slerp",
- "developer": "LilRg",
- "inference_platform": "unknown",
- "id": "LilRg/10PRYMMAL-3B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1945903535951276
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5320377091634505
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14954682779456194
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3213087248322148
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45290625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3881316489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Phi3ForCausalLM",
- "params_billions": 3.821
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_ECE-1B-merge-PRYMMAL/3fefac8e-d5aa-4998-ab60-6e3dcc49f77f.json b/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_ECE-1B-merge-PRYMMAL/3fefac8e-d5aa-4998-ab60-6e3dcc49f77f.json
deleted file mode 100644
index f2f2261079f99786967bf4af7b8b8d55433abe06..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_ECE-1B-merge-PRYMMAL/3fefac8e-d5aa-4998-ab60-6e3dcc49f77f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LilRg_ECE-1B-merge-PRYMMAL/1762652579.7310941",
- "retrieved_timestamp": "1762652579.731095",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LilRg/ECE-1B-merge-PRYMMAL",
- "developer": "LilRg",
- "inference_platform": "unknown",
- "id": "LilRg/ECE-1B-merge-PRYMMAL"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27122811916825135
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42345600176908743
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10120845921450151
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3801041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2906416223404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_ECE_Finetunning/f20fd926-d690-4fe2-80a4-3e79dc37f03f.json b/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_ECE_Finetunning/f20fd926-d690-4fe2-80a4-3e79dc37f03f.json
deleted file mode 100644
index 1b0c45574a0b6405ec001a7fc12835464ac7cccd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_ECE_Finetunning/f20fd926-d690-4fe2-80a4-3e79dc37f03f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LilRg_ECE_Finetunning/1762652579.731307",
- "retrieved_timestamp": "1762652579.731308",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LilRg/ECE_Finetunning",
- "developer": "LilRg",
- "inference_platform": "unknown",
- "id": "LilRg/ECE_Finetunning"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04453849120334047
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47321596790730514
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.045317220543806644
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38394791666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3191489361702128
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "?",
- "params_billions": 16.061
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-6B-slerp/8fedde0a-96fe-4a6f-9e0f-87832cfd418e.json b/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-6B-slerp/8fedde0a-96fe-4a6f-9e0f-87832cfd418e.json
deleted file mode 100644
index dfa2ca042424a09593dc4a5db5345eb3d0906b19..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-6B-slerp/8fedde0a-96fe-4a6f-9e0f-87832cfd418e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-6B-slerp/1762652579.731526",
- "retrieved_timestamp": "1762652579.7315269",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LilRg/PRYMMAL-6B-slerp",
- "developer": "LilRg",
- "inference_platform": "unknown",
- "id": "LilRg/PRYMMAL-6B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11533065599276586
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28676215692036117
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24580536912751677
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36975
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1107878989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.293
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V3/a656eacf-8134-446c-8417-e1c3c54fe941.json b/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V3/a656eacf-8134-446c-8417-e1c3c54fe941.json
deleted file mode 100644
index 94d56548c9a95bcc83c4c782eb9c14abeabece54..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V3/a656eacf-8134-446c-8417-e1c3c54fe941.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V3/1762652579.731744",
- "retrieved_timestamp": "1762652579.731745",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V3",
- "developer": "LilRg",
- "inference_platform": "unknown",
- "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12432346174816154
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2957239084980124
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.009818731117824773
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25671140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36714583333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11269946808510638
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V4/0d276bd3-a338-4383-88b0-9e653ae01387.json b/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V4/0d276bd3-a338-4383-88b0-9e653ae01387.json
deleted file mode 100644
index 22d46842f1a77a691c1989406191575c0ceda57a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V4/0d276bd3-a338-4383-88b0-9e653ae01387.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V4/1762652579.731953",
- "retrieved_timestamp": "1762652579.7319539",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V4",
- "developer": "LilRg",
- "inference_platform": "unknown",
- "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12492298213185458
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2957239084980124
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.009818731117824773
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25671140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36714583333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11269946808510638
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V5/150d0730-e194-4d2b-96e1-54f914b5fe28.json b/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V5/150d0730-e194-4d2b-96e1-54f914b5fe28.json
deleted file mode 100644
index 372e6d6b2e1b67e0829d0b8432c02510d7d6fc43..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V5/150d0730-e194-4d2b-96e1-54f914b5fe28.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V5/1762652579.7321632",
- "retrieved_timestamp": "1762652579.7321641",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V5",
- "developer": "LilRg",
- "inference_platform": "unknown",
- "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12492298213185458
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2957239084980124
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.009818731117824773
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25671140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36714583333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11269946808510638
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V6/b23913b9-f774-4927-be16-874d8e146218.json b/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V6/b23913b9-f774-4927-be16-874d8e146218.json
deleted file mode 100644
index bb830035778e94c2c97a998f5be4f2432acfef52..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V6/b23913b9-f774-4927-be16-874d8e146218.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V6/1762652579.732379",
- "retrieved_timestamp": "1762652579.732379",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V6",
- "developer": "LilRg",
- "inference_platform": "unknown",
- "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V6"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12432346174816154
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2957239084980124
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.009818731117824773
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25671140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36714583333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11269946808510638
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V7/dd12d7df-9b32-4d2a-ae9a-40304cf4bfd7.json b/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V7/dd12d7df-9b32-4d2a-ae9a-40304cf4bfd7.json
deleted file mode 100644
index 955a38e0f409372ddb41f93fb7e3b7f7338bb58a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-ECE-7B-SLERP-V7/dd12d7df-9b32-4d2a-ae9a-40304cf4bfd7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-ECE-7B-SLERP-V7/1762652579.732605",
- "retrieved_timestamp": "1762652579.732606",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LilRg/PRYMMAL-ECE-7B-SLERP-V7",
- "developer": "LilRg",
- "inference_platform": "unknown",
- "id": "LilRg/PRYMMAL-ECE-7B-SLERP-V7"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12492298213185458
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2957239084980124
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.009818731117824773
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25671140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36714583333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11269946808510638
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-slerp-Merge/9574abe0-00e3-4e38-bda0-b217f002a480.json b/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-slerp-Merge/9574abe0-00e3-4e38-bda0-b217f002a480.json
deleted file mode 100644
index 5c0530fcadfa8ec9cec51ab5f4312454e5e02d9e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LilRg/LilRg_PRYMMAL-slerp-Merge/9574abe0-00e3-4e38-bda0-b217f002a480.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LilRg_PRYMMAL-slerp-Merge/1762652579.732816",
- "retrieved_timestamp": "1762652579.732817",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LilRg/PRYMMAL-slerp-Merge",
- "developer": "LilRg",
- "inference_platform": "unknown",
- "id": "LilRg/PRYMMAL-slerp-Merge"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.304400102838247
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5364156271768925
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16163141993957703
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46347916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3863031914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Phi3ForCausalLM",
- "params_billions": 3.821
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/LimYeri/LimYeri_CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged/d020a655-1cc0-49e9-9db1-f8b871babd5c.json b/leaderboard_data/HFOpenLLMv2/LimYeri/LimYeri_CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged/d020a655-1cc0-49e9-9db1-f8b871babd5c.json
deleted file mode 100644
index 750e9ad2171dbaec5f1e35e91891d1bb938057a7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/LimYeri/LimYeri_CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged/d020a655-1cc0-49e9-9db1-f8b871babd5c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LimYeri_CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged/1762652579.733827",
- "retrieved_timestamp": "1762652579.733829",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged",
- "developer": "LimYeri",
- "inference_platform": "unknown",
- "id": "LimYeri/CodeMind-Llama3-8B-unsloth_v4-one-DPO-merged"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6492406813920397
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48526582322240047
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06797583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3607916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3353557180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Locutusque/Locutusque_CollectiveLM-Falcon-3-7B/44737b7e-4942-4496-a818-fddce66da4d6.json b/leaderboard_data/HFOpenLLMv2/Locutusque/Locutusque_CollectiveLM-Falcon-3-7B/44737b7e-4942-4496-a818-fddce66da4d6.json
deleted file mode 100644
index 3167c7e62df2b9d8a6792587c8309509455390c4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Locutusque/Locutusque_CollectiveLM-Falcon-3-7B/44737b7e-4942-4496-a818-fddce66da4d6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Locutusque_CollectiveLM-Falcon-3-7B/1762652579.734693",
- "retrieved_timestamp": "1762652579.734694",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Locutusque/CollectiveLM-Falcon-3-7B",
- "developer": "Locutusque",
- "inference_platform": "unknown",
- "id": "Locutusque/CollectiveLM-Falcon-3-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3918281271470808
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5105131374222629
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21827794561933533
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32550335570469796
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3887291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35987367021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 7.456
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Luni/Luni_StarDust-12b-v1/fa64b745-6b4b-4fee-b77e-d744e54a17d6.json b/leaderboard_data/HFOpenLLMv2/Luni/Luni_StarDust-12b-v1/fa64b745-6b4b-4fee-b77e-d744e54a17d6.json
deleted file mode 100644
index 2b59cf1b7dd63f464e2ccaa6ef00448597ac8ff3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Luni/Luni_StarDust-12b-v1/fa64b745-6b4b-4fee-b77e-d744e54a17d6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Luni_StarDust-12b-v1/1762652579.736537",
- "retrieved_timestamp": "1762652579.7365382",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Luni/StarDust-12b-v1",
- "developer": "Luni",
- "inference_platform": "unknown",
- "id": "Luni/StarDust-12b-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5459259210007226
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5366139363101082
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07628398791540786
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43244791666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34117353723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Luni/Luni_StarDust-12b-v2/401f6afc-9a2a-4bfe-87b2-daa6df848424.json b/leaderboard_data/HFOpenLLMv2/Luni/Luni_StarDust-12b-v2/401f6afc-9a2a-4bfe-87b2-daa6df848424.json
deleted file mode 100644
index 564236d26296c752e791ca52fc0078fbbe9d0b6a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Luni/Luni_StarDust-12b-v2/401f6afc-9a2a-4bfe-87b2-daa6df848424.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Luni_StarDust-12b-v2/1762652579.736784",
- "retrieved_timestamp": "1762652579.736785",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Luni/StarDust-12b-v2",
- "developer": "Luni",
- "inference_platform": "unknown",
- "id": "Luni/StarDust-12b-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5628620947973599
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5419479534912178
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06873111782477341
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4338125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3439162234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Lyte/Lyte_Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3/8fdc62c0-215c-4502-8f56-188455fe2d9e.json b/leaderboard_data/HFOpenLLMv2/Lyte/Lyte_Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3/8fdc62c0-215c-4502-8f56-188455fe2d9e.json
deleted file mode 100644
index 7b5062a2fdaf6082fe45f9ca4d3d78919f21d905..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Lyte/Lyte_Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3/8fdc62c0-215c-4502-8f56-188455fe2d9e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lyte_Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3/1762652579.74142",
- "retrieved_timestamp": "1762652579.74142",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3",
- "developer": "Lyte",
- "inference_platform": "unknown",
- "id": "Lyte/Llama-3.1-8B-Instruct-Reasoner-1o1_v0.3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7098155117310957
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4949521619329585
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1903323262839879
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.346125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36178523936170215
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Lyte/Lyte_Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04/ea928079-f00f-41b1-a628-c1539b41e63d.json b/leaderboard_data/HFOpenLLMv2/Lyte/Lyte_Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04/ea928079-f00f-41b1-a628-c1539b41e63d.json
deleted file mode 100644
index 6e299d24d1173edf21b96a4ff94c011b91e12b28..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Lyte/Lyte_Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04/ea928079-f00f-41b1-a628-c1539b41e63d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lyte_Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04/1762652579.7416818",
- "retrieved_timestamp": "1762652579.741683",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04",
- "developer": "Lyte",
- "inference_platform": "unknown",
- "id": "Lyte/Llama-3.2-1B-Instruct-COT-RL-Expriement1-EP04"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5773503193748144
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3515036874279285
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08006042296072508
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32355208333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18425864361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.236
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MEscriva/MEscriva_ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/ab59c1cb-ac90-4fe1-b782-2e038734366e.json b/leaderboard_data/HFOpenLLMv2/MEscriva/MEscriva_ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/ab59c1cb-ac90-4fe1-b782-2e038734366e.json
deleted file mode 100644
index 40c11e18c05d6a32592a1986e2a1983676c9fe7f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MEscriva/MEscriva_ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/ab59c1cb-ac90-4fe1-b782-2e038734366e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MEscriva_ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis/1762652579.7424488",
- "retrieved_timestamp": "1762652579.7424488",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis",
- "developer": "MEscriva",
- "inference_platform": "unknown",
- "id": "MEscriva/ECE-PRYMMAL-0.5B-FT-V5-MUSR-Mathis"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08662903318749807
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.305728612437881
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.010574018126888216
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2516778523489933
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40171874999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11544215425531915
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "?",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MTSAIR/MTSAIR_Cotype-Nano/b5fa19ff-9b05-4d71-9d79-54f8dfe4a8ab.json b/leaderboard_data/HFOpenLLMv2/MTSAIR/MTSAIR_Cotype-Nano/b5fa19ff-9b05-4d71-9d79-54f8dfe4a8ab.json
deleted file mode 100644
index bc85947ba96ba014f9b496eecbf2e3950e2437ad..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MTSAIR/MTSAIR_Cotype-Nano/b5fa19ff-9b05-4d71-9d79-54f8dfe4a8ab.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MTSAIR_Cotype-Nano/1762652579.742943",
- "retrieved_timestamp": "1762652579.742944",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MTSAIR/Cotype-Nano",
- "developer": "MTSAIR",
- "inference_platform": "unknown",
- "id": "MTSAIR/Cotype-Nano"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3747922179816221
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3864940969601492
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09743202416918428
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3289166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24767287234042554
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MTSAIR/MTSAIR_MultiVerse_70B/a713dba7-110a-40a0-9d89-d48567d423af.json b/leaderboard_data/HFOpenLLMv2/MTSAIR/MTSAIR_MultiVerse_70B/a713dba7-110a-40a0-9d89-d48567d423af.json
deleted file mode 100644
index 2b9060f16e5b03779bb7d01087afb8a055e98355..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MTSAIR/MTSAIR_MultiVerse_70B/a713dba7-110a-40a0-9d89-d48567d423af.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MTSAIR_MultiVerse_70B/1762652579.743202",
- "retrieved_timestamp": "1762652579.7432032",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MTSAIR/MultiVerse_70B",
- "developer": "MTSAIR",
- "inference_platform": "unknown",
- "id": "MTSAIR/MultiVerse_70B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5249183278146429
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6183134284931178
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19259818731117825
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3540268456375839
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47398958333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48603723404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 72.289
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_Llama-3-8B-Magpie-Align-SFT-v0.1/f3024d7f-f25f-4220-973a-b0e19ecb5e1d.json b/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_Llama-3-8B-Magpie-Align-SFT-v0.1/f3024d7f-f25f-4220-973a-b0e19ecb5e1d.json
deleted file mode 100644
index 65c689416cec4a0fda147a56f0920141366ea969..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_Llama-3-8B-Magpie-Align-SFT-v0.1/f3024d7f-f25f-4220-973a-b0e19ecb5e1d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3-8B-Magpie-Align-SFT-v0.1/1762652579.743415",
- "retrieved_timestamp": "1762652579.743416",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1",
- "developer": "Magpie-Align",
- "inference_platform": "unknown",
- "id": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4361416596851908
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4615102744527366
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05740181268882175
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32773958333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2863198138297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_Llama-3-8B-Magpie-Align-SFT-v0.3/4756be0b-fd98-467f-a256-73aabba09c97.json b/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_Llama-3-8B-Magpie-Align-SFT-v0.3/4756be0b-fd98-467f-a256-73aabba09c97.json
deleted file mode 100644
index b42aec033b4a4891eb10395a9e396e1bd20bcc04..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_Llama-3-8B-Magpie-Align-SFT-v0.3/4756be0b-fd98-467f-a256-73aabba09c97.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3-8B-Magpie-Align-SFT-v0.3/1762652579.743664",
- "retrieved_timestamp": "1762652579.743665",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3",
- "developer": "Magpie-Align",
- "inference_platform": "unknown",
- "id": "Magpie-Align/Llama-3-8B-Magpie-Align-SFT-v0.3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5063586838477463
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45715808996720547
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07326283987915408
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34237500000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902260638297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_Llama-3.1-8B-Magpie-Align-SFT-v0.1/43d2e788-e186-485d-8c34-10bdfd7a6b65.json b/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_Llama-3.1-8B-Magpie-Align-SFT-v0.1/43d2e788-e186-485d-8c34-10bdfd7a6b65.json
deleted file mode 100644
index b691d52b934afdd5ef7f2dfba4005c35864b878e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_Llama-3.1-8B-Magpie-Align-SFT-v0.1/43d2e788-e186-485d-8c34-10bdfd7a6b65.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Magpie-Align_Llama-3.1-8B-Magpie-Align-SFT-v0.1/1762652579.744527",
- "retrieved_timestamp": "1762652579.744527",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1",
- "developer": "Magpie-Align",
- "inference_platform": "unknown",
- "id": "Magpie-Align/Llama-3.1-8B-Magpie-Align-SFT-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47820671374176077
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4764157817799906
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08987915407854985
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3397395833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29429853723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_MagpieLM-8B-Chat-v0.1/b14fcc84-7caf-4aa8-b728-8a1287a5c04a.json b/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_MagpieLM-8B-Chat-v0.1/b14fcc84-7caf-4aa8-b728-8a1287a5c04a.json
deleted file mode 100644
index 9d2d80a8abaad71725a6619ba655b2ea75ef09f5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_MagpieLM-8B-Chat-v0.1/b14fcc84-7caf-4aa8-b728-8a1287a5c04a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Magpie-Align_MagpieLM-8B-Chat-v0.1/1762652579.744951",
- "retrieved_timestamp": "1762652579.744951",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Magpie-Align/MagpieLM-8B-Chat-v0.1",
- "developer": "Magpie-Align",
- "inference_platform": "unknown",
- "id": "Magpie-Align/MagpieLM-8B-Chat-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3700714105240761
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4172338260055306
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06117824773413897
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3500625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3194813829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_MagpieLM-8B-SFT-v0.1/eb307f58-db7e-44b3-bf03-7264a39bed69.json b/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_MagpieLM-8B-SFT-v0.1/eb307f58-db7e-44b3-bf03-7264a39bed69.json
deleted file mode 100644
index f831d8c477c6a2aecc09af0ac27b27c7da1aeabc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Magpie-Align/Magpie-Align_MagpieLM-8B-SFT-v0.1/eb307f58-db7e-44b3-bf03-7264a39bed69.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Magpie-Align_MagpieLM-8B-SFT-v0.1/1762652579.7451751",
- "retrieved_timestamp": "1762652579.7451751",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Magpie-Align/MagpieLM-8B-SFT-v0.1",
- "developer": "Magpie-Align",
- "inference_platform": "unknown",
- "id": "Magpie-Align/MagpieLM-8B-SFT-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4720619068515982
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45528501595553356
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0755287009063444
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3648854166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2989527925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ManoloPueblo/ManoloPueblo_ContentCuisine_1-7B-slerp/74d2724e-9d5d-4142-9cff-3fd40c931882.json b/leaderboard_data/HFOpenLLMv2/ManoloPueblo/ManoloPueblo_ContentCuisine_1-7B-slerp/74d2724e-9d5d-4142-9cff-3fd40c931882.json
deleted file mode 100644
index ef26313d3830f93a8dd4af921d71dedc4dc00d07..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ManoloPueblo/ManoloPueblo_ContentCuisine_1-7B-slerp/74d2724e-9d5d-4142-9cff-3fd40c931882.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ManoloPueblo_ContentCuisine_1-7B-slerp/1762652579.745631",
- "retrieved_timestamp": "1762652579.745632",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ManoloPueblo/ContentCuisine_1-7B-slerp",
- "developer": "ManoloPueblo",
- "inference_platform": "unknown",
- "id": "ManoloPueblo/ContentCuisine_1-7B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3907044419916932
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5188437309746964
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07326283987915408
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46719791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30535239361702127
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ManoloPueblo/ManoloPueblo_LLM_MERGE_CC2/f7ca7fb6-b02c-4c27-afef-662bb62cd054.json b/leaderboard_data/HFOpenLLMv2/ManoloPueblo/ManoloPueblo_LLM_MERGE_CC2/f7ca7fb6-b02c-4c27-afef-662bb62cd054.json
deleted file mode 100644
index 6be3c15dd2a94432fa3b899bccf0f6ce3a2a9a84..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ManoloPueblo/ManoloPueblo_LLM_MERGE_CC2/f7ca7fb6-b02c-4c27-afef-662bb62cd054.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ManoloPueblo_LLM_MERGE_CC2/1762652579.745891",
- "retrieved_timestamp": "1762652579.745892",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ManoloPueblo/LLM_MERGE_CC2",
- "developer": "ManoloPueblo",
- "inference_platform": "unknown",
- "id": "ManoloPueblo/LLM_MERGE_CC2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3853087585384557
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5209367401710429
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06419939577039276
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45929166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30319148936170215
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ManoloPueblo/ManoloPueblo_LLM_MERGE_CC3/1c3dfe6a-28e7-4125-a802-1898336b1beb.json b/leaderboard_data/HFOpenLLMv2/ManoloPueblo/ManoloPueblo_LLM_MERGE_CC3/1c3dfe6a-28e7-4125-a802-1898336b1beb.json
deleted file mode 100644
index 992b2cf6f355bbe67f00c3ec23794f0e7b5294fe..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ManoloPueblo/ManoloPueblo_LLM_MERGE_CC3/1c3dfe6a-28e7-4125-a802-1898336b1beb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ManoloPueblo_LLM_MERGE_CC3/1762652579.7460978",
- "retrieved_timestamp": "1762652579.746099",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ManoloPueblo/LLM_MERGE_CC3",
- "developer": "ManoloPueblo",
- "inference_platform": "unknown",
- "id": "ManoloPueblo/LLM_MERGE_CC3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3958751667797001
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5246290546274339
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07930513595166164
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4671666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3155751329787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MarinaraSpaghetti/MarinaraSpaghetti_NemoReRemix-12B/ac67a9d9-0f5a-4891-a9e5-2a924fbf4f72.json b/leaderboard_data/HFOpenLLMv2/MarinaraSpaghetti/MarinaraSpaghetti_NemoReRemix-12B/ac67a9d9-0f5a-4891-a9e5-2a924fbf4f72.json
deleted file mode 100644
index e2e4d79ed00372b7f80aba761fefe86673edf678..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MarinaraSpaghetti/MarinaraSpaghetti_NemoReRemix-12B/ac67a9d9-0f5a-4891-a9e5-2a924fbf4f72.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MarinaraSpaghetti_NemoReRemix-12B/1762652579.7463942",
- "retrieved_timestamp": "1762652579.746399",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MarinaraSpaghetti/NemoReRemix-12B",
- "developer": "MarinaraSpaghetti",
- "inference_platform": "unknown",
- "id": "MarinaraSpaghetti/NemoReRemix-12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33425089872649016
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5536511805668158
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09063444108761329
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3179530201342282
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4501458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3597905585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MarinaraSpaghetti/MarinaraSpaghetti_Nemomix-v4.0-12B/aeac3ed0-e93b-4fb2-bdd5-1fd06ccd3338.json b/leaderboard_data/HFOpenLLMv2/MarinaraSpaghetti/MarinaraSpaghetti_Nemomix-v4.0-12B/aeac3ed0-e93b-4fb2-bdd5-1fd06ccd3338.json
deleted file mode 100644
index e8a9387e0a3dababb43372b7112ba3a959058f5c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MarinaraSpaghetti/MarinaraSpaghetti_Nemomix-v4.0-12B/aeac3ed0-e93b-4fb2-bdd5-1fd06ccd3338.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MarinaraSpaghetti_Nemomix-v4.0-12B/1762652579.746819",
- "retrieved_timestamp": "1762652579.7468212",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MarinaraSpaghetti/Nemomix-v4.0-12B",
- "developer": "MarinaraSpaghetti",
- "inference_platform": "unknown",
- "id": "MarinaraSpaghetti/Nemomix-v4.0-12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5574664113441224
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5274986611124783
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10800604229607251
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42444791666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36128656914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_MiniMathExpert-2_61B-ECE-PRYMMAL-Martial/2c99d2a7-7a5f-4357-ad92-745d8a718ee3.json b/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_MiniMathExpert-2_61B-ECE-PRYMMAL-Martial/2c99d2a7-7a5f-4357-ad92-745d8a718ee3.json
deleted file mode 100644
index 6838cf4e514f27f7c798aa3d401e7d599a9b87b4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_MiniMathExpert-2_61B-ECE-PRYMMAL-Martial/2c99d2a7-7a5f-4357-ad92-745d8a718ee3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Marsouuu_MiniMathExpert-2_61B-ECE-PRYMMAL-Martial/1762652579.747071",
- "retrieved_timestamp": "1762652579.747073",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial",
- "developer": "Marsouuu",
- "inference_platform": "unknown",
- "id": "Marsouuu/MiniMathExpert-2_61B-ECE-PRYMMAL-Martial"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25484159807089635
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3952730330493959
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07401812688821752
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2751677852348993
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40832291666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22739361702127658
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_general3B-ECE-PRYMMAL-Martial/6f36320a-dcfd-4e93-87b2-53763dde5c57.json b/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_general3B-ECE-PRYMMAL-Martial/6f36320a-dcfd-4e93-87b2-53763dde5c57.json
deleted file mode 100644
index af099c17c4133387a688e86d398546b6026cb99f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_general3B-ECE-PRYMMAL-Martial/6f36320a-dcfd-4e93-87b2-53763dde5c57.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Marsouuu_general3B-ECE-PRYMMAL-Martial/1762652579.748109",
- "retrieved_timestamp": "1762652579.74811",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Marsouuu/general3B-ECE-PRYMMAL-Martial",
- "developer": "Marsouuu",
- "inference_platform": "unknown",
- "id": "Marsouuu/general3B-ECE-PRYMMAL-Martial"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27222658102722996
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5394350977017502
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15483383685800603
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4700520833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38763297872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Phi3ForCausalLM",
- "params_billions": 3.821
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_general3Bv2-ECE-PRYMMAL-Martial/716552b2-6343-4339-b9f5-a573fa47c384.json b/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_general3Bv2-ECE-PRYMMAL-Martial/716552b2-6343-4339-b9f5-a573fa47c384.json
deleted file mode 100644
index 95475411b4e3d91799e86b89991a8eeaf09958a4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_general3Bv2-ECE-PRYMMAL-Martial/716552b2-6343-4339-b9f5-a573fa47c384.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Marsouuu_general3Bv2-ECE-PRYMMAL-Martial/1762652579.748472",
- "retrieved_timestamp": "1762652579.7484732",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Marsouuu/general3Bv2-ECE-PRYMMAL-Martial",
- "developer": "Marsouuu",
- "inference_platform": "unknown",
- "id": "Marsouuu/general3Bv2-ECE-PRYMMAL-Martial"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5692817280371636
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5636569831901026
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36706948640483383
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43960416666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4498005319148936
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_lareneg1_78B-ECE-PRYMMAL-Martial/49532386-7e9b-4719-9c24-5d463dea6cfc.json b/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_lareneg1_78B-ECE-PRYMMAL-Martial/49532386-7e9b-4719-9c24-5d463dea6cfc.json
deleted file mode 100644
index 3891ec167dc1fe7815f04025365c8180b1a7a099..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_lareneg1_78B-ECE-PRYMMAL-Martial/49532386-7e9b-4719-9c24-5d463dea6cfc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Marsouuu_lareneg1_78B-ECE-PRYMMAL-Martial/1762652579.7487411",
- "retrieved_timestamp": "1762652579.7487419",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial",
- "developer": "Marsouuu",
- "inference_platform": "unknown",
- "id": "Marsouuu/lareneg1_78B-ECE-PRYMMAL-Martial"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2794961812435449
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42301343044108936
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11404833836858005
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28187919463087246
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38673958333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2922207446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_lareneg3B-ECE-PRYMMAL-Martial/8d0e995d-2859-461b-8be7-60d2b2690d6b.json b/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_lareneg3B-ECE-PRYMMAL-Martial/8d0e995d-2859-461b-8be7-60d2b2690d6b.json
deleted file mode 100644
index e6da8c755742528289d36656912be0d8687a6ac6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_lareneg3B-ECE-PRYMMAL-Martial/8d0e995d-2859-461b-8be7-60d2b2690d6b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Marsouuu_lareneg3B-ECE-PRYMMAL-Martial/1762652579.748992",
- "retrieved_timestamp": "1762652579.748993",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Marsouuu/lareneg3B-ECE-PRYMMAL-Martial",
- "developer": "Marsouuu",
- "inference_platform": "unknown",
- "id": "Marsouuu/lareneg3B-ECE-PRYMMAL-Martial"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33032908239028
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5453325807578268
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15181268882175228
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32466442953020136
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47246875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37666223404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Phi3ForCausalLM",
- "params_billions": 3.821
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_lareneg3Bv2-ECE-PRYMMAL-Martial/09b5771f-9ee2-4f4f-9fa9-e0280c33b00f.json b/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_lareneg3Bv2-ECE-PRYMMAL-Martial/09b5771f-9ee2-4f4f-9fa9-e0280c33b00f.json
deleted file mode 100644
index d702a851f79413887d3c8315da90da70e489d6f7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Marsouuu/Marsouuu_lareneg3Bv2-ECE-PRYMMAL-Martial/09b5771f-9ee2-4f4f-9fa9-e0280c33b00f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Marsouuu_lareneg3Bv2-ECE-PRYMMAL-Martial/1762652579.749232",
- "retrieved_timestamp": "1762652579.749232",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial",
- "developer": "Marsouuu",
- "inference_platform": "unknown",
- "id": "Marsouuu/lareneg3Bv2-ECE-PRYMMAL-Martial"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5753267995585047
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.562336014537904
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36555891238670696
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4369375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45113031914893614
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Calme-4x7B-MoE-v0.1/f4512664-c531-4b13-b76e-e96c2b03febf.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Calme-4x7B-MoE-v0.1/f4512664-c531-4b13-b76e-e96c2b03febf.json
deleted file mode 100644
index a5fdd8645ca9cae2615262306aed7d606b334ade..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Calme-4x7B-MoE-v0.1/f4512664-c531-4b13-b76e-e96c2b03febf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Calme-4x7B-MoE-v0.1/1762652579.7495291",
- "retrieved_timestamp": "1762652579.74953",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/Calme-4x7B-MoE-v0.1",
- "developer": "MaziyarPanahi",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/Calme-4x7B-MoE-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4315205875964663
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5102819889174134
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08006042296072508
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28187919463087246
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4198854166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3056848404255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 24.154
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Calme-4x7B-MoE-v0.2/ca2df1c9-79b2-453b-9cd1-b607e48f5dd7.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Calme-4x7B-MoE-v0.2/ca2df1c9-79b2-453b-9cd1-b607e48f5dd7.json
deleted file mode 100644
index ffcbc703f75147c878e8c46545e1b84ddd92660c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Calme-4x7B-MoE-v0.2/ca2df1c9-79b2-453b-9cd1-b607e48f5dd7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Calme-4x7B-MoE-v0.2/1762652579.7498329",
- "retrieved_timestamp": "1762652579.749834",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/Calme-4x7B-MoE-v0.2",
- "developer": "MaziyarPanahi",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/Calme-4x7B-MoE-v0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.429447200095746
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5110766802558263
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07401812688821752
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43176041666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30576795212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 24.154
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-70B-Instruct-v0.1/1e2759fa-3e87-447b-b0ca-5a4e2e293589.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-70B-Instruct-v0.1/1e2759fa-3e87-447b-b0ca-5a4e2e293589.json
deleted file mode 100644
index a88c159124925999bb1fe41f5456871cd14fb426..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-70B-Instruct-v0.1/1e2759fa-3e87-447b-b0ca-5a4e2e293589.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Llama-3-70B-Instruct-v0.1/1762652579.750048",
- "retrieved_timestamp": "1762652579.750049",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/Llama-3-70B-Instruct-v0.1",
- "developer": "MaziyarPanahi",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/Llama-3-70B-Instruct-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47143800671108216
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5366257615951637
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18051359516616314
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4433020833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4617686170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-8B-Instruct-v0.10/19143059-07d5-44b2-b599-193147f6196a.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-8B-Instruct-v0.10/19143059-07d5-44b2-b599-193147f6196a.json
deleted file mode 100644
index 8534dc3444d99b784231fd0a14bd10a55be408cb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-8B-Instruct-v0.10/19143059-07d5-44b2-b599-193147f6196a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Llama-3-8B-Instruct-v0.10/1762652579.750272",
- "retrieved_timestamp": "1762652579.750272",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/Llama-3-8B-Instruct-v0.10",
- "developer": "MaziyarPanahi",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.10"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7667433520835827
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4924311866686311
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05740181268882175
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42143749999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38622007978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-8B-Instruct-v0.8/c68859dd-6db0-4bdc-a031-92ac7d1d2585.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-8B-Instruct-v0.8/c68859dd-6db0-4bdc-a031-92ac7d1d2585.json
deleted file mode 100644
index 4097bb53d9764b4b518eac80a79a5e8d909f3bbe..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-8B-Instruct-v0.8/c68859dd-6db0-4bdc-a031-92ac7d1d2585.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Llama-3-8B-Instruct-v0.8/1762652579.750486",
- "retrieved_timestamp": "1762652579.750487",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/Llama-3-8B-Instruct-v0.8",
- "developer": "MaziyarPanahi",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.8"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7527549125209998
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49627836815949883
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07779456193353475
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42019791666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3853058510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-8B-Instruct-v0.9/1fb0056b-4f66-404b-89ac-a58185747ce2.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-8B-Instruct-v0.9/1fb0056b-4f66-404b-89ac-a58185747ce2.json
deleted file mode 100644
index 1e9bcef47ef8487928166ff9f584b37eff34814e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Llama-3-8B-Instruct-v0.9/1fb0056b-4f66-404b-89ac-a58185747ce2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Llama-3-8B-Instruct-v0.9/1762652579.750697",
- "retrieved_timestamp": "1762652579.750697",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/Llama-3-8B-Instruct-v0.9",
- "developer": "MaziyarPanahi",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/Llama-3-8B-Instruct-v0.9"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.763046494412603
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4936132794870085
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07326283987915408
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4148020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3845578457446808
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Qwen2-7B-Instruct-v0.1/ce4ee4fe-8a38-467b-b189-b25311c23c4e.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Qwen2-7B-Instruct-v0.1/ce4ee4fe-8a38-467b-b189-b25311c23c4e.json
deleted file mode 100644
index c7d25bbd66e8aec907cad2ae270a403da34a075c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Qwen2-7B-Instruct-v0.1/ce4ee4fe-8a38-467b-b189-b25311c23c4e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Qwen2-7B-Instruct-v0.1/1762652579.7511811",
- "retrieved_timestamp": "1762652579.751182",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/Qwen2-7B-Instruct-v0.1",
- "developer": "MaziyarPanahi",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/Qwen2-7B-Instruct-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33522498082864577
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5123061019250074
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2212990936555891
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44347916666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3857214095744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Qwen2-7B-Instruct-v0.8/a65af628-f518-4da7-afc5-7cba4234415b.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Qwen2-7B-Instruct-v0.8/a65af628-f518-4da7-afc5-7cba4234415b.json
deleted file mode 100644
index 81e179870d3b94925e992488b74a60aeb204f973..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_Qwen2-7B-Instruct-v0.8/a65af628-f518-4da7-afc5-7cba4234415b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Qwen2-7B-Instruct-v0.8/1762652579.751401",
- "retrieved_timestamp": "1762652579.751402",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/Qwen2-7B-Instruct-v0.8",
- "developer": "MaziyarPanahi",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/Qwen2-7B-Instruct-v0.8"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27747266142723526
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4637108491317945
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17673716012084592
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4293125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3566323138297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.1-rys-78b/387000a4-7ef5-46c6-9b5e-9bfe7c2cfc18.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.1-rys-78b/387000a4-7ef5-46c6-9b5e-9bfe7c2cfc18.json
deleted file mode 100644
index be46ff654f0292e9853001628eaee59b6d8ac440..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.1-rys-78b/387000a4-7ef5-46c6-9b5e-9bfe7c2cfc18.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-rys-78b/1762652579.752971",
- "retrieved_timestamp": "1762652579.752971",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-2.1-rys-78b",
- "developer": "MaziyarPanahi",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-2.1-rys-78b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8135547015252862
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7097861139530462
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3942598187311178
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39429530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4693125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5443816489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 77.965
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.2-rys-78b/cfaafe4c-50a1-4cde-b092-fdbaeea86fb3.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.2-rys-78b/cfaafe4c-50a1-4cde-b092-fdbaeea86fb3.json
deleted file mode 100644
index 1c8b2e3e8f7a448db3b3da576167b85527ec01d7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.2-rys-78b/cfaafe4c-50a1-4cde-b092-fdbaeea86fb3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-rys-78b/1762652579.754511",
- "retrieved_timestamp": "1762652579.754511",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-2.2-rys-78b",
- "developer": "MaziyarPanahi",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-2.2-rys-78b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7986420475449585
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7081014602379213
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4070996978851964
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40687919463087246
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45356250000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.538563829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 77.965
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.3-rys-78b/33a06134-e58d-4bc7-8421-c5ae2f0dcd1f.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.3-rys-78b/33a06134-e58d-4bc7-8421-c5ae2f0dcd1f.json
deleted file mode 100644
index 228b775cde56a3c079434a73d3e938e95fab6fc4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.3-rys-78b/33a06134-e58d-4bc7-8421-c5ae2f0dcd1f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-rys-78b/1762652579.7562392",
- "retrieved_timestamp": "1762652579.7562408",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-2.3-rys-78b",
- "developer": "MaziyarPanahi",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-2.3-rys-78b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8065854155862002
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7107763314317289
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39803625377643503
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40436241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45492708333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5475398936170213
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 77.965
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.4-rys-78b/48433dc8-40ff-4e36-8c6a-ced33bc22e4f.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.4-rys-78b/48433dc8-40ff-4e36-8c6a-ced33bc22e4f.json
deleted file mode 100644
index 588ca075cc9aa74bce22641fc23cc9b67797189c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-2.4-rys-78b/48433dc8-40ff-4e36-8c6a-ced33bc22e4f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.4-rys-78b/1762652579.7570088",
- "retrieved_timestamp": "1762652579.75701",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-2.4-rys-78b",
- "developer": "MaziyarPanahi",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-2.4-rys-78b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8010899967641414
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7279510956242796
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4070996978851964
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40268456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5770624999999999
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7002160904255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 77.965
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.1-baguette-3b/8f0a6518-d153-43ec-b426-02136a2bc367.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.1-baguette-3b/8f0a6518-d153-43ec-b426-02136a2bc367.json
deleted file mode 100644
index 18bba3292191c4525da433d03f27d41eb7d983a1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.1-baguette-3b/8f0a6518-d153-43ec-b426-02136a2bc367.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.1-baguette-3b/1762652579.7580318",
- "retrieved_timestamp": "1762652579.7580328",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-3.1-baguette-3b",
- "developer": "MaziyarPanahi",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-3.1-baguette-3b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6234369251364158
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46833341042911075
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25604229607250756
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2860738255033557
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40079166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33992686170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.085
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.1-instruct-3b/67915bce-0b54-4996-90f6-cec6def9bbba.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.1-instruct-3b/67915bce-0b54-4996-90f6-cec6def9bbba.json
deleted file mode 100644
index f97759091b428982e074dfd1413c4cf15cf24ab8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.1-instruct-3b/67915bce-0b54-4996-90f6-cec6def9bbba.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.1-instruct-3b/1762652579.758249",
- "retrieved_timestamp": "1762652579.75825",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-3.1-instruct-3b",
- "developer": "MaziyarPanahi",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-3.1-instruct-3b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43359397509718656
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4812730148043098
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17749244712990936
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2860738255033557
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39520833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.355718085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.085
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.1-instruct-78b/898e5e91-c4c0-4494-baad-37c2bfd1931b.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.1-instruct-78b/898e5e91-c4c0-4494-baad-37c2bfd1931b.json
deleted file mode 100644
index b70a6851d2921f4ceae94dd07318390d908b63f0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.1-instruct-78b/898e5e91-c4c0-4494-baad-37c2bfd1931b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.1-instruct-78b/1762652579.7584739",
- "retrieved_timestamp": "1762652579.758475",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-3.1-instruct-78b",
- "developer": "MaziyarPanahi",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-3.1-instruct-78b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8135547015252862
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7305154498840408
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39274924471299094
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3959731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5890624999999999
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.718500664893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 77.965
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.2-baguette-3b/e49441f3-99a5-4cdb-bff1-79cc21711bab.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.2-baguette-3b/e49441f3-99a5-4cdb-bff1-79cc21711bab.json
deleted file mode 100644
index 4ef2faf8e803e1b74343d8ce3573f6cd861913d3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.2-baguette-3b/e49441f3-99a5-4cdb-bff1-79cc21711bab.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.2-baguette-3b/1762652579.75889",
- "retrieved_timestamp": "1762652579.758891",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-3.2-baguette-3b",
- "developer": "MaziyarPanahi",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-3.2-baguette-3b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6338282423968404
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.470862269902714
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2824773413897281
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40209374999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3337765957446808
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.085
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.2-instruct-3b/83e46bac-5266-4f65-a4dd-76240b297adc.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.2-instruct-3b/83e46bac-5266-4f65-a4dd-76240b297adc.json
deleted file mode 100644
index 854147fe0623a41ff372b37a23df2299985d9b84..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.2-instruct-3b/83e46bac-5266-4f65-a4dd-76240b297adc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.2-instruct-3b/1762652579.759095",
- "retrieved_timestamp": "1762652579.7590961",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-3.2-instruct-3b",
- "developer": "MaziyarPanahi",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-3.2-instruct-3b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5533196363426819
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4865641110376735
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21676737160120846
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2835570469798658
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40469791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36527593085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.2-instruct-78b/77cc280c-b794-4a9a-addc-e2eb0a1af896.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.2-instruct-78b/77cc280c-b794-4a9a-addc-e2eb0a1af896.json
deleted file mode 100644
index 7e7dc4d5a1a3fc3603f23c31b059e5c8f29e86fe..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.2-instruct-78b/77cc280c-b794-4a9a-addc-e2eb0a1af896.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.2-instruct-78b/1762652579.759298",
- "retrieved_timestamp": "1762652579.759299",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-3.2-instruct-78b",
- "developer": "MaziyarPanahi",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-3.2-instruct-78b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8062607215521482
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7318616272092674
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4033232628398791
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40268456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6023645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7303025265957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 77.965
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.3-baguette-3b/22cbbb6d-1014-42af-96cf-1636fcb40679.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.3-baguette-3b/22cbbb6d-1014-42af-96cf-1636fcb40679.json
deleted file mode 100644
index 7c43e9c6cdcc565cd67c578f86c8fdebf1d4feb4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.3-baguette-3b/22cbbb6d-1014-42af-96cf-1636fcb40679.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.3-baguette-3b/1762652579.759511",
- "retrieved_timestamp": "1762652579.759511",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-3.3-baguette-3b",
- "developer": "MaziyarPanahi",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-3.3-baguette-3b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6359514975819713
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4678217295957521
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3806646525679758
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39282291666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3341921542553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.3-instruct-3b/8aa85bd2-eab2-491b-95a3-ac6321cbe298.json b/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.3-instruct-3b/8aa85bd2-eab2-491b-95a3-ac6321cbe298.json
deleted file mode 100644
index 355fd2dd9bcd51301f5d3b03a8eaaaf4f6b9a81c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MaziyarPanahi/MaziyarPanahi_calme-3.3-instruct-3b/8aa85bd2-eab2-491b-95a3-ac6321cbe298.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-3.3-instruct-3b/1762652579.759784",
- "retrieved_timestamp": "1762652579.759785",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-3.3-instruct-3b",
- "developer": "MaziyarPanahi",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-3.3-instruct-3b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6423212631373645
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46933409427688694
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37386706948640486
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40742708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33053523936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Minami-su/Minami-su_test-7B-00/ba9ead4a-3d47-4a51-bc39-dbf72d7ff3af.json b/leaderboard_data/HFOpenLLMv2/Minami-su/Minami-su_test-7B-00/ba9ead4a-3d47-4a51-bc39-dbf72d7ff3af.json
deleted file mode 100644
index 503a40553fd713520856be459002dd3bd82cfff1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Minami-su/Minami-su_test-7B-00/ba9ead4a-3d47-4a51-bc39-dbf72d7ff3af.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Minami-su_test-7B-00/1762652579.7606468",
- "retrieved_timestamp": "1762652579.76065",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Minami-su/test-7B-00",
- "developer": "Minami-su",
- "inference_platform": "unknown",
- "id": "Minami-su/test-7B-00"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6690492338107332
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44661237656101793
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4516616314199396
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41260416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3587932180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Minami-su/Minami-su_test-7B-01/2918f03e-3fd5-4183-be8d-2911e0204e8d.json b/leaderboard_data/HFOpenLLMv2/Minami-su/Minami-su_test-7B-01/2918f03e-3fd5-4183-be8d-2911e0204e8d.json
deleted file mode 100644
index 02f3dec17a62892bd8a949c63e048155e623fbbd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Minami-su/Minami-su_test-7B-01/2918f03e-3fd5-4183-be8d-2911e0204e8d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Minami-su_test-7B-01/1762652579.761029",
- "retrieved_timestamp": "1762652579.76103",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Minami-su/test-7B-01",
- "developer": "Minami-su",
- "inference_platform": "unknown",
- "id": "Minami-su/test-7B-01"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6736204382150472
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4422359420239754
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4554380664652568
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41530208333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35355718085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Minami-su/Minami-su_test-v2-7B-00/95abd2ea-1fb7-4ef8-b186-bfe67148e486.json b/leaderboard_data/HFOpenLLMv2/Minami-su/Minami-su_test-v2-7B-00/95abd2ea-1fb7-4ef8-b186-bfe67148e486.json
deleted file mode 100644
index 0d80d9c60a7dac62431482e3ee95bb4e76397f38..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Minami-su/Minami-su_test-v2-7B-00/95abd2ea-1fb7-4ef8-b186-bfe67148e486.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Minami-su_test-v2-7B-00/1762652579.76127",
- "retrieved_timestamp": "1762652579.761271",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Minami-su/test-v2-7B-00",
- "developer": "Minami-su",
- "inference_platform": "unknown",
- "id": "Minami-su/test-v2-7B-00"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6747197436136119
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4415989344595353
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4418429003021148
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41542708333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3472406914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ModelCloud/ModelCloud_Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1/4a68c55f-ac3d-4173-a1cc-8bb97a2b8466.json b/leaderboard_data/HFOpenLLMv2/ModelCloud/ModelCloud_Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1/4a68c55f-ac3d-4173-a1cc-8bb97a2b8466.json
deleted file mode 100644
index 1880985b97db3ccbe230892afc0e57ac1ca8a8ae..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ModelCloud/ModelCloud_Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1/4a68c55f-ac3d-4173-a1cc-8bb97a2b8466.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ModelCloud_Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1/1762652579.761516",
- "retrieved_timestamp": "1762652579.761517",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1",
- "developer": "ModelCloud",
- "inference_platform": "unknown",
- "id": "ModelCloud/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortex-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5268919799465418
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3252726665015006
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2533557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3249166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17644614361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 5.453
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Mostafa8Mehrabi/Mostafa8Mehrabi_llama-3.2-1b-Insomnia-ChatBot-merged/940d1360-047b-4c12-a7e5-cd002675c69c.json b/leaderboard_data/HFOpenLLMv2/Mostafa8Mehrabi/Mostafa8Mehrabi_llama-3.2-1b-Insomnia-ChatBot-merged/940d1360-047b-4c12-a7e5-cd002675c69c.json
deleted file mode 100644
index 53b1812768f4b8bf43cfbb50f778257401e1cdff..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Mostafa8Mehrabi/Mostafa8Mehrabi_llama-3.2-1b-Insomnia-ChatBot-merged/940d1360-047b-4c12-a7e5-cd002675c69c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Mostafa8Mehrabi_llama-3.2-1b-Insomnia-ChatBot-merged/1762652579.7624152",
- "retrieved_timestamp": "1762652579.7624161",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged",
- "developer": "Mostafa8Mehrabi",
- "inference_platform": "unknown",
- "id": "Mostafa8Mehrabi/llama-3.2-1b-Insomnia-ChatBot-merged"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13206735905176042
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3003508901818665
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0075528700906344415
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23657718120805368
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33815625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11311502659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.236
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MrRobotoAI/MrRobotoAI_MrRoboto-ProLong-8b-v4i/7c100a09-f34e-4bd7-b201-3779ee5a769d.json b/leaderboard_data/HFOpenLLMv2/MrRobotoAI/MrRobotoAI_MrRoboto-ProLong-8b-v4i/7c100a09-f34e-4bd7-b201-3779ee5a769d.json
deleted file mode 100644
index 4471be4e35932a05f4408a0af3a292556a102880..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MrRobotoAI/MrRobotoAI_MrRoboto-ProLong-8b-v4i/7c100a09-f34e-4bd7-b201-3779ee5a769d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MrRobotoAI_MrRoboto-ProLong-8b-v4i/1762652579.762677",
- "retrieved_timestamp": "1762652579.762678",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MrRobotoAI/MrRoboto-ProLong-8b-v4i",
- "developer": "MrRobotoAI",
- "inference_platform": "unknown",
- "id": "MrRobotoAI/MrRoboto-ProLong-8b-v4i"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3834603297029659
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.458548650453507
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05513595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.401375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3068484042553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 4.015
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MrRobotoAI/MrRobotoAI_MrRoboto-ProLongBASE-pt8-unaligned-8b/4c54b609-0af6-4116-b62f-1c8a4d68f06b.json b/leaderboard_data/HFOpenLLMv2/MrRobotoAI/MrRobotoAI_MrRoboto-ProLongBASE-pt8-unaligned-8b/4c54b609-0af6-4116-b62f-1c8a4d68f06b.json
deleted file mode 100644
index 387ae7f26d3c710f71dcfc10477ebbefbf9b6056..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MrRobotoAI/MrRobotoAI_MrRoboto-ProLongBASE-pt8-unaligned-8b/4c54b609-0af6-4116-b62f-1c8a4d68f06b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MrRobotoAI_MrRoboto-ProLongBASE-pt8-unaligned-8b/1762652579.762937",
- "retrieved_timestamp": "1762652579.762937",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b",
- "developer": "MrRobotoAI",
- "inference_platform": "unknown",
- "id": "MrRobotoAI/MrRoboto-ProLongBASE-pt8-unaligned-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34754008253655855
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4515254903058233
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04229607250755287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42788541666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2565658244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 4.015
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1211-3B/2cc4a013-ff0c-44b0-b2e1-66e103606e12.json b/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1211-3B/2cc4a013-ff0c-44b0-b2e1-66e103606e12.json
deleted file mode 100644
index 01db929ded5e651a63eb5fe7a68ec3db8bf6c435..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1211-3B/2cc4a013-ff0c-44b0-b2e1-66e103606e12.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MultivexAI_Gladiator-Mini-Exp-1211-3B/1762652579.763158",
- "retrieved_timestamp": "1762652579.763159",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MultivexAI/Gladiator-Mini-Exp-1211-3B",
- "developer": "MultivexAI",
- "inference_platform": "unknown",
- "id": "MultivexAI/Gladiator-Mini-Exp-1211-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.68760887777763
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44843752663028075
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13746223564954682
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.326
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3151595744680851
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1221-3B-Instruct-V2/a152be8c-a542-4a73-8164-a43e1f04c595.json b/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1221-3B-Instruct-V2/a152be8c-a542-4a73-8164-a43e1f04c595.json
deleted file mode 100644
index eb54c4301a0927832b0e0aa422f812d658367c73..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1221-3B-Instruct-V2/a152be8c-a542-4a73-8164-a43e1f04c595.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MultivexAI_Gladiator-Mini-Exp-1221-3B-Instruct-V2/1762652579.763629",
- "retrieved_timestamp": "1762652579.7636302",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2",
- "developer": "MultivexAI",
- "inference_platform": "unknown",
- "id": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct-V2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6215386286165153
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.438883390990549
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14123867069486404
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30082291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3025265957446808
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1221-3B-Instruct/ebfb99cd-9672-4c30-9540-46e4035a0d43.json b/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1221-3B-Instruct/ebfb99cd-9672-4c30-9540-46e4035a0d43.json
deleted file mode 100644
index 992d58c1a69e68237d6029549ce2d3641241bba6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1221-3B-Instruct/ebfb99cd-9672-4c30-9540-46e4035a0d43.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MultivexAI_Gladiator-Mini-Exp-1221-3B-Instruct/1762652579.763424",
- "retrieved_timestamp": "1762652579.763425",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct",
- "developer": "MultivexAI",
- "inference_platform": "unknown",
- "id": "MultivexAI/Gladiator-Mini-Exp-1221-3B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6078748830879843
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4369766992416903
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1351963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31145833333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3048537234042553
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1222-3B-Instruct/990d6877-4045-49ef-ae23-f5a6302185d6.json b/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1222-3B-Instruct/990d6877-4045-49ef-ae23-f5a6302185d6.json
deleted file mode 100644
index d19d47b6e76d3354a4832eb8345c841cf94b7e5f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Gladiator-Mini-Exp-1222-3B-Instruct/990d6877-4045-49ef-ae23-f5a6302185d6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MultivexAI_Gladiator-Mini-Exp-1222-3B-Instruct/1762652579.763836",
- "retrieved_timestamp": "1762652579.7638369",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct",
- "developer": "MultivexAI",
- "inference_platform": "unknown",
- "id": "MultivexAI/Gladiator-Mini-Exp-1222-3B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6163180361440976
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4373182371021645
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14123867069486404
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31276041666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30169547872340424
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF/c14766b4-5339-4c6e-87d9-fc2bb953e176.json b/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF/c14766b4-5339-4c6e-87d9-fc2bb953e176.json
deleted file mode 100644
index 7d4536459aa0c7626d40c2d340277868211fbabe..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/MultivexAI/MultivexAI_Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF/c14766b4-5339-4c6e-87d9-fc2bb953e176.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MultivexAI_Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF/1762652579.764051",
- "retrieved_timestamp": "1762652579.764052",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF",
- "developer": "MultivexAI",
- "inference_platform": "unknown",
- "id": "MultivexAI/Phi-3.5-Mini-Instruct-MultiVex-v0.25-GGUF"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14398241111362298
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29077474506950557
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.006042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2550335570469799
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3641979166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11087101063829788
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.821
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-0.3B-Instruct-v1.1/0f9eeb32-85fb-4778-8618-436aa4f891ad.json b/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-0.3B-Instruct-v1.1/0f9eeb32-85fb-4778-8618-436aa4f891ad.json
deleted file mode 100644
index c685b50501f5adcd4811ab720df9c62a6c34071c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-0.3B-Instruct-v1.1/0f9eeb32-85fb-4778-8618-436aa4f891ad.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-0.3B-Instruct-v1.1/1762652579.764531",
- "retrieved_timestamp": "1762652579.764531",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Mxode/NanoLM-0.3B-Instruct-v1.1",
- "developer": "Mxode",
- "inference_platform": "unknown",
- "id": "Mxode/NanoLM-0.3B-Instruct-v1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17827918810977095
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3014403673764691
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.013595166163141994
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42733333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11211768617021277
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.315
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-0.3B-Instruct-v1/3c08189e-294e-4682-a7e0-e73a8d498fb2.json b/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-0.3B-Instruct-v1/3c08189e-294e-4682-a7e0-e73a8d498fb2.json
deleted file mode 100644
index 5aece453085b44583af97f8c82195039330843fc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-0.3B-Instruct-v1/3c08189e-294e-4682-a7e0-e73a8d498fb2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-0.3B-Instruct-v1/1762652579.764268",
- "retrieved_timestamp": "1762652579.764269",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Mxode/NanoLM-0.3B-Instruct-v1",
- "developer": "Mxode",
- "inference_platform": "unknown",
- "id": "Mxode/NanoLM-0.3B-Instruct-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1536744726215331
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30282462164767127
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.014350453172205438
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27181208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41552083333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11053856382978723
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.315
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-0.3B-Instruct-v2/43ce0bee-e8ee-417d-be0d-841d6e26b330.json b/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-0.3B-Instruct-v2/43ce0bee-e8ee-417d-be0d-841d6e26b330.json
deleted file mode 100644
index 49297ded9c9ee1e6ed2f6d4d4a49aed4cc3ecf7b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-0.3B-Instruct-v2/43ce0bee-e8ee-417d-be0d-841d6e26b330.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-0.3B-Instruct-v2/1762652579.7647529",
- "retrieved_timestamp": "1762652579.7647538",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Mxode/NanoLM-0.3B-Instruct-v2",
- "developer": "Mxode",
- "inference_platform": "unknown",
- "id": "Mxode/NanoLM-0.3B-Instruct-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1667885654507817
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29211039456850646
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.006797583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3954583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11344747340425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.315
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-1B-Instruct-v1.1/2e482de2-60ca-4758-9de8-4482e42a5b7a.json b/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-1B-Instruct-v1.1/2e482de2-60ca-4758-9de8-4482e42a5b7a.json
deleted file mode 100644
index 1153a7070ddbcaa7eaa0bc5b94f2311fcb55242b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-1B-Instruct-v1.1/2e482de2-60ca-4758-9de8-4482e42a5b7a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-1B-Instruct-v1.1/1762652579.764964",
- "retrieved_timestamp": "1762652579.764964",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Mxode/NanoLM-1B-Instruct-v1.1",
- "developer": "Mxode",
- "inference_platform": "unknown",
- "id": "Mxode/NanoLM-1B-Instruct-v1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23952889444451833
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31835012059590373
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03625377643504532
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34327083333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12150930851063829
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.076
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-1B-Instruct-v2/d7d1e48d-86af-4f65-803b-30fff69c78b5.json b/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-1B-Instruct-v2/d7d1e48d-86af-4f65-803b-30fff69c78b5.json
deleted file mode 100644
index f3f4936d8fda1a17604acee010a0bd8832317f9f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Mxode/Mxode_NanoLM-1B-Instruct-v2/d7d1e48d-86af-4f65-803b-30fff69c78b5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Mxode_NanoLM-1B-Instruct-v2/1762652579.765177",
- "retrieved_timestamp": "1762652579.7651782",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Mxode/NanoLM-1B-Instruct-v2",
- "developer": "Mxode",
- "inference_platform": "unknown",
- "id": "Mxode/NanoLM-1B-Instruct-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2629844368497808
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3123145400715591
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04154078549848943
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35520833333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12375332446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.076
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NAPS-ai/NAPS-ai_naps-llama-3_1-8b-instruct-v0.3/d0ce5c14-28fa-4fde-901e-6670db6943de.json b/leaderboard_data/HFOpenLLMv2/NAPS-ai/NAPS-ai_naps-llama-3_1-8b-instruct-v0.3/d0ce5c14-28fa-4fde-901e-6670db6943de.json
deleted file mode 100644
index 004ae590c98c745fd1222fb20c7c4eac34120334..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NAPS-ai/NAPS-ai_naps-llama-3_1-8b-instruct-v0.3/d0ce5c14-28fa-4fde-901e-6670db6943de.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama-3_1-8b-instruct-v0.3/1762652579.765912",
- "retrieved_timestamp": "1762652579.765913",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.3",
- "developer": "NAPS-ai",
- "inference_platform": "unknown",
- "id": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5390818583580456
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4900525115527062
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1903323262839879
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37870833333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33984375
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NAPS-ai/NAPS-ai_naps-llama-3_1-8b-instruct-v0.4/467a9428-e85d-489d-be59-91842b389732.json b/leaderboard_data/HFOpenLLMv2/NAPS-ai/NAPS-ai_naps-llama-3_1-8b-instruct-v0.4/467a9428-e85d-489d-be59-91842b389732.json
deleted file mode 100644
index d1e3b9ade89634861745a66a07f61ff8028bc3ed..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NAPS-ai/NAPS-ai_naps-llama-3_1-8b-instruct-v0.4/467a9428-e85d-489d-be59-91842b389732.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama-3_1-8b-instruct-v0.4/1762652579.766172",
- "retrieved_timestamp": "1762652579.766173",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.4",
- "developer": "NAPS-ai",
- "inference_platform": "unknown",
- "id": "NAPS-ai/naps-llama-3_1-8b-instruct-v0.4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7344202272193336
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4861833360906734
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19637462235649547
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4421145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3474900265957447
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NAPS-ai/NAPS-ai_naps-llama-3_1-instruct-v0.5.0/5553fa1d-6bf9-469d-b870-590dd4965209.json b/leaderboard_data/HFOpenLLMv2/NAPS-ai/NAPS-ai_naps-llama-3_1-instruct-v0.5.0/5553fa1d-6bf9-469d-b870-590dd4965209.json
deleted file mode 100644
index 8cca74cdf60ab6744e035b12808fd36ecd98e770..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NAPS-ai/NAPS-ai_naps-llama-3_1-instruct-v0.5.0/5553fa1d-6bf9-469d-b870-590dd4965209.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-llama-3_1-instruct-v0.5.0/1762652579.766381",
- "retrieved_timestamp": "1762652579.766382",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NAPS-ai/naps-llama-3_1-instruct-v0.5.0",
- "developer": "NAPS-ai",
- "inference_platform": "unknown",
- "id": "NAPS-ai/naps-llama-3_1-instruct-v0.5.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5020124381086628
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4147584365689691
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03625377643504532
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37127083333333327
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26138630319148937
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NCSOFT/NCSOFT_Llama-VARCO-8B-Instruct/38876858-0585-4edb-a4af-e4c71530429c.json b/leaderboard_data/HFOpenLLMv2/NCSOFT/NCSOFT_Llama-VARCO-8B-Instruct/38876858-0585-4edb-a4af-e4c71530429c.json
deleted file mode 100644
index 80f4e3594a16d3c8676bbc300466fd7c2cb6f84e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NCSOFT/NCSOFT_Llama-VARCO-8B-Instruct/38876858-0585-4edb-a4af-e4c71530429c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NCSOFT_Llama-VARCO-8B-Instruct/1762652579.767406",
- "retrieved_timestamp": "1762652579.7674072",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NCSOFT/Llama-VARCO-8B-Instruct",
- "developer": "NCSOFT",
- "inference_platform": "unknown",
- "id": "NCSOFT/Llama-VARCO-8B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4470327619604871
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5022879316026018
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10649546827794562
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3840729166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31898271276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NJS26/NJS26_NJS_777/211449c7-9b14-4d20-a599-58718e9c5e4b.json b/leaderboard_data/HFOpenLLMv2/NJS26/NJS26_NJS_777/211449c7-9b14-4d20-a599-58718e9c5e4b.json
deleted file mode 100644
index ce7a68af85e466e5bf3da67666bd68197f46b2a2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NJS26/NJS26_NJS_777/211449c7-9b14-4d20-a599-58718e9c5e4b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NJS26_NJS_777/1762652579.76769",
- "retrieved_timestamp": "1762652579.76769",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NJS26/NJS_777",
- "developer": "NJS26",
- "inference_platform": "unknown",
- "id": "NJS26/NJS_777"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18809647291409015
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21782097894078087
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2063758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35378125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11627327127659574
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 10.362
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NLPark/NLPark_AnFeng_v3.1-Avocet/17b3cc41-69ac-48a2-9371-a5d1368dfeb9.json b/leaderboard_data/HFOpenLLMv2/NLPark/NLPark_AnFeng_v3.1-Avocet/17b3cc41-69ac-48a2-9371-a5d1368dfeb9.json
deleted file mode 100644
index b2e36791666f6d586660d7a44e01a1eaf95b1f92..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NLPark/NLPark_AnFeng_v3.1-Avocet/17b3cc41-69ac-48a2-9371-a5d1368dfeb9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NLPark_AnFeng_v3.1-Avocet/1762652579.76799",
- "retrieved_timestamp": "1762652579.767991",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NLPark/AnFeng_v3.1-Avocet",
- "developer": "NLPark",
- "inference_platform": "unknown",
- "id": "NLPark/AnFeng_v3.1-Avocet"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5096311121158525
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.582852329074409
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1593655589123867
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32466442953020136
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44757291666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44381648936170215
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.393
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NLPark/NLPark_B-and-W_Flycatcher-3AD1E/95b94fcb-7aba-4473-b88f-36dddcd646c1.json b/leaderboard_data/HFOpenLLMv2/NLPark/NLPark_B-and-W_Flycatcher-3AD1E/95b94fcb-7aba-4473-b88f-36dddcd646c1.json
deleted file mode 100644
index 962e28d6f49071da9b65810231918009d425643e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NLPark/NLPark_B-and-W_Flycatcher-3AD1E/95b94fcb-7aba-4473-b88f-36dddcd646c1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NLPark_B-and-W_Flycatcher-3AD1E/1762652579.7682638",
- "retrieved_timestamp": "1762652579.768265",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NLPark/B-and-W_Flycatcher-3AD1E",
- "developer": "NLPark",
- "inference_platform": "unknown",
- "id": "NLPark/B-and-W_Flycatcher-3AD1E"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49084650948372543
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6065117528534355
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23791540785498488
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44227083333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4740691489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NLPark/NLPark_Shi-Ci-Robin-Test_3AD80/0fa6785d-8db5-40f9-b259-3368ffb547d4.json b/leaderboard_data/HFOpenLLMv2/NLPark/NLPark_Shi-Ci-Robin-Test_3AD80/0fa6785d-8db5-40f9-b259-3368ffb547d4.json
deleted file mode 100644
index 0863d4fe19b956434b2aeea78ebed1cabfb180d3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NLPark/NLPark_Shi-Ci-Robin-Test_3AD80/0fa6785d-8db5-40f9-b259-3368ffb547d4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NLPark_Shi-Ci-Robin-Test_3AD80/1762652579.768489",
- "retrieved_timestamp": "1762652579.76849",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NLPark/Shi-Ci-Robin-Test_3AD80",
- "developer": "NLPark",
- "inference_platform": "unknown",
- "id": "NLPark/Shi-Ci-Robin-Test_3AD80"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7226547782107031
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6704805157570325
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3157099697885196
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3598993288590604
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46959375000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5120511968085106
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NTQAI/NTQAI_NxMobileLM-1.5B-SFT/7a295af9-fb47-484f-8748-af3ee245d2c5.json b/leaderboard_data/HFOpenLLMv2/NTQAI/NTQAI_NxMobileLM-1.5B-SFT/7a295af9-fb47-484f-8748-af3ee245d2c5.json
deleted file mode 100644
index 421e966bde6c943deb430e7bb1a94b017f6c066a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NTQAI/NTQAI_NxMobileLM-1.5B-SFT/7a295af9-fb47-484f-8748-af3ee245d2c5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NTQAI_NxMobileLM-1.5B-SFT/1762652579.768717",
- "retrieved_timestamp": "1762652579.768718",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NTQAI/NxMobileLM-1.5B-SFT",
- "developer": "NTQAI",
- "inference_platform": "unknown",
- "id": "NTQAI/NxMobileLM-1.5B-SFT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6392239258500778
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39571778048116
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08459214501510574
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35552083333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28174867021276595
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NTQAI/NTQAI_Nxcode-CQ-7B-orpo/1c020e50-fe68-40c9-a36a-7bec201f409a.json b/leaderboard_data/HFOpenLLMv2/NTQAI/NTQAI_Nxcode-CQ-7B-orpo/1c020e50-fe68-40c9-a36a-7bec201f409a.json
deleted file mode 100644
index 10c0b2dae074074f28448837231ea9a83ab9c504..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NTQAI/NTQAI_Nxcode-CQ-7B-orpo/1c020e50-fe68-40c9-a36a-7bec201f409a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NTQAI_Nxcode-CQ-7B-orpo/1762652579.769034",
- "retrieved_timestamp": "1762652579.769035",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NTQAI/Nxcode-CQ-7B-orpo",
- "developer": "NTQAI",
- "inference_platform": "unknown",
- "id": "NTQAI/Nxcode-CQ-7B-orpo"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40072119753365515
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4143023249178217
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02190332326283988
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25419463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39396875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16115359042553193
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.25
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NYTK/NYTK_PULI-LlumiX-32K/7230c1f3-d7f6-4a96-8308-b2d5895a0a0a.json b/leaderboard_data/HFOpenLLMv2/NYTK/NYTK_PULI-LlumiX-32K/7230c1f3-d7f6-4a96-8308-b2d5895a0a0a.json
deleted file mode 100644
index b2d6ee0977b03f08115b9fa5a8d789cff0ab0afe..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NYTK/NYTK_PULI-LlumiX-32K/7230c1f3-d7f6-4a96-8308-b2d5895a0a0a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NYTK_PULI-LlumiX-32K/1762652579.76952",
- "retrieved_timestamp": "1762652579.769521",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NYTK/PULI-LlumiX-32K",
- "developer": "NYTK",
- "inference_platform": "unknown",
- "id": "NYTK/PULI-LlumiX-32K"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1699612583500667
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31893582242949375
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01283987915407855
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2533557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39641666666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16805186170212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.738
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NbAiLab/NbAiLab_nb-llama-3.1-8B-Instruct/b0f68843-2f49-4d2a-91ab-ad8d07791125.json b/leaderboard_data/HFOpenLLMv2/NbAiLab/NbAiLab_nb-llama-3.1-8B-Instruct/b0f68843-2f49-4d2a-91ab-ad8d07791125.json
deleted file mode 100644
index 229512a3707d0f4227d5f8670bca47ae85c564e9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NbAiLab/NbAiLab_nb-llama-3.1-8B-Instruct/b0f68843-2f49-4d2a-91ab-ad8d07791125.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NbAiLab_nb-llama-3.1-8B-Instruct/1762652579.7700322",
- "retrieved_timestamp": "1762652579.770033",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NbAiLab/nb-llama-3.1-8B-Instruct",
- "developer": "NbAiLab",
- "inference_platform": "unknown",
- "id": "NbAiLab/nb-llama-3.1-8B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.362502604201297
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32466553135589526
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.022658610271903322
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27348993288590606
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32076041666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1196808510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NbAiLab/NbAiLab_nb-llama-3.1-8B-sft/e8313b88-13ee-4926-90f8-696b0604c7b9.json b/leaderboard_data/HFOpenLLMv2/NbAiLab/NbAiLab_nb-llama-3.1-8B-sft/e8313b88-13ee-4926-90f8-696b0604c7b9.json
deleted file mode 100644
index e7f8f963c548471cd56e1af05b5bfe5d08877dfb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NbAiLab/NbAiLab_nb-llama-3.1-8B-sft/e8313b88-13ee-4926-90f8-696b0604c7b9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NbAiLab_nb-llama-3.1-8B-sft/1762652579.7703218",
- "retrieved_timestamp": "1762652579.770323",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NbAiLab/nb-llama-3.1-8B-sft",
- "developer": "NbAiLab",
- "inference_platform": "unknown",
- "id": "NbAiLab/nb-llama-3.1-8B-sft"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36157838978355206
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3281509048328078
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02190332326283988
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25419463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3287291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12217420212765957
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Nekochu/Nekochu_Llama-3.1-8B-french-DPO/ebc2a3b7-30e9-4608-a8c0-ea90a308c0e5.json b/leaderboard_data/HFOpenLLMv2/Nekochu/Nekochu_Llama-3.1-8B-french-DPO/ebc2a3b7-30e9-4608-a8c0-ea90a308c0e5.json
deleted file mode 100644
index 2ba1a11a41f3f85afa7fb8e47dc9a9c6cc3cca97..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Nekochu/Nekochu_Llama-3.1-8B-french-DPO/ebc2a3b7-30e9-4608-a8c0-ea90a308c0e5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Nekochu_Llama-3.1-8B-french-DPO/1762652579.770777",
- "retrieved_timestamp": "1762652579.7707782",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Nekochu/Llama-3.1-8B-french-DPO",
- "developer": "Nekochu",
- "inference_platform": "unknown",
- "id": "Nekochu/Llama-3.1-8B-french-DPO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46564227361179444
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5110888403999433
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09743202416918428
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4215625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3414228723404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Nekochu/Nekochu_Luminia-13B-v3/172f121a-3843-4b01-94e1-a95001909bb8.json b/leaderboard_data/HFOpenLLMv2/Nekochu/Nekochu_Luminia-13B-v3/172f121a-3843-4b01-94e1-a95001909bb8.json
deleted file mode 100644
index c92d93d964c9db3bc0ecf18b1166605981ec48d4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Nekochu/Nekochu_Luminia-13B-v3/172f121a-3843-4b01-94e1-a95001909bb8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Nekochu_Luminia-13B-v3/1762652579.771023",
- "retrieved_timestamp": "1762652579.771023",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Nekochu/Luminia-13B-v3",
- "developer": "Nekochu",
- "inference_platform": "unknown",
- "id": "Nekochu/Luminia-13B-v3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25231829323971505
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41121515510929624
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01812688821752266
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3983333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22149268617021275
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 13.016
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Nekochu/Nekochu_Luminia-8B-RP/fd23ba4a-a0ce-474b-9aa4-b5295d872028.json b/leaderboard_data/HFOpenLLMv2/Nekochu/Nekochu_Luminia-8B-RP/fd23ba4a-a0ce-474b-9aa4-b5295d872028.json
deleted file mode 100644
index 9a50f5bfaca2b2456b93dc45025aa4730f6452fe..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Nekochu/Nekochu_Luminia-8B-RP/fd23ba4a-a0ce-474b-9aa4-b5295d872028.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Nekochu_Luminia-8B-RP/1762652579.7713962",
- "retrieved_timestamp": "1762652579.7713978",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Nekochu/Luminia-8B-RP",
- "developer": "Nekochu",
- "inference_platform": "unknown",
- "id": "Nekochu/Luminia-8B-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5574165436597118
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5218151030627874
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13595166163141995
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3997604166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3631150265957447
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NeverSleep/NeverSleep_Lumimaid-v0.2-12B/cee1293c-54fb-4275-b5a9-0215e5f9a4c0.json b/leaderboard_data/HFOpenLLMv2/NeverSleep/NeverSleep_Lumimaid-v0.2-12B/cee1293c-54fb-4275-b5a9-0215e5f9a4c0.json
deleted file mode 100644
index b9bdbd6217988ef607d8154e502b996b06fca99d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NeverSleep/NeverSleep_Lumimaid-v0.2-12B/cee1293c-54fb-4275-b5a9-0215e5f9a4c0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NeverSleep_Lumimaid-v0.2-12B/1762652579.771668",
- "retrieved_timestamp": "1762652579.771669",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NeverSleep/Lumimaid-v0.2-12B",
- "developer": "NeverSleep",
- "inference_platform": "unknown",
- "id": "NeverSleep/Lumimaid-v0.2-12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10993497253952846
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5395610525850818
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05664652567975831
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3145973154362416
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48211458333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3511469414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NeverSleep/NeverSleep_Lumimaid-v0.2-8B/6d7f1ac9-66c8-4700-87a9-0e413fc8878e.json b/leaderboard_data/HFOpenLLMv2/NeverSleep/NeverSleep_Lumimaid-v0.2-8B/6d7f1ac9-66c8-4700-87a9-0e413fc8878e.json
deleted file mode 100644
index 95f35860e0c45a0572a6503a80dec45848210388..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NeverSleep/NeverSleep_Lumimaid-v0.2-8B/6d7f1ac9-66c8-4700-87a9-0e413fc8878e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NeverSleep_Lumimaid-v0.2-8B/1762652579.771939",
- "retrieved_timestamp": "1762652579.771939",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NeverSleep/Lumimaid-v0.2-8B",
- "developer": "NeverSleep",
- "inference_platform": "unknown",
- "id": "NeverSleep/Lumimaid-v0.2-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5038109992597419
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5237767601226618
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14350453172205438
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.311241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4303020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36361369680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Nexesenex/Nexesenex_Nemotron_W_4b_Halo_0.1/243b045a-8442-41fd-a483-e4e25b771048.json b/leaderboard_data/HFOpenLLMv2/Nexesenex/Nexesenex_Nemotron_W_4b_Halo_0.1/243b045a-8442-41fd-a483-e4e25b771048.json
deleted file mode 100644
index 9115ad10e55889db80f513e375e51110a277eed6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Nexesenex/Nexesenex_Nemotron_W_4b_Halo_0.1/243b045a-8442-41fd-a483-e4e25b771048.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Nexesenex_Nemotron_W_4b_Halo_0.1/1762652579.78175",
- "retrieved_timestamp": "1762652579.7817512",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Nexesenex/Nemotron_W_4b_Halo_0.1",
- "developer": "Nexesenex",
- "inference_platform": "unknown",
- "id": "Nexesenex/Nemotron_W_4b_Halo_0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3627275628665275
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4135101667655742
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04229607250755287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28020134228187926
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41651041666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25049867021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 4.513
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Nexesenex/Nexesenex_Nemotron_W_4b_MagLight_0.1/2f3f0dcb-a62d-44bd-b86d-c1f403d5b833.json b/leaderboard_data/HFOpenLLMv2/Nexesenex/Nexesenex_Nemotron_W_4b_MagLight_0.1/2f3f0dcb-a62d-44bd-b86d-c1f403d5b833.json
deleted file mode 100644
index 1308d91299cac2baf9c814a9894f419ba523c31d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Nexesenex/Nexesenex_Nemotron_W_4b_MagLight_0.1/2f3f0dcb-a62d-44bd-b86d-c1f403d5b833.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Nexesenex_Nemotron_W_4b_MagLight_0.1/1762652579.781992",
- "retrieved_timestamp": "1762652579.781993",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Nexesenex/Nemotron_W_4b_MagLight_0.1",
- "developer": "Nexesenex",
- "inference_platform": "unknown",
- "id": "Nexesenex/Nemotron_W_4b_MagLight_0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4230275668559422
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42314083807225433
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04003021148036254
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2835570469798658
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41120833333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2544880319148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 4.513
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Nexesenex/Nexesenex_pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL/318afc06-f294-4253-b1c9-173a7f56083b.json b/leaderboard_data/HFOpenLLMv2/Nexesenex/Nexesenex_pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL/318afc06-f294-4253-b1c9-173a7f56083b.json
deleted file mode 100644
index 35d7bd691f27c3b4ef4c7e1d919f815ce64052ae..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Nexesenex/Nexesenex_pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL/318afc06-f294-4253-b1c9-173a7f56083b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Nexesenex_pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL/1762652579.7826922",
- "retrieved_timestamp": "1762652579.7826939",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL",
- "developer": "Nexesenex",
- "inference_platform": "unknown",
- "id": "Nexesenex/pankajmathur_orca_mini_v9_6_1B-instruct-Abliterated-LPL"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5889905450870357
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3562492190965966
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07477341389728095
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26677852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33955208333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1802692819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.236
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Nexusflow/Nexusflow_NexusRaven-V2-13B/f5e5662e-803e-4f1f-82e7-14a2a189ed6d.json b/leaderboard_data/HFOpenLLMv2/Nexusflow/Nexusflow_NexusRaven-V2-13B/f5e5662e-803e-4f1f-82e7-14a2a189ed6d.json
deleted file mode 100644
index 156c68fee87b606ce685d918592150a884c7fa97..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Nexusflow/Nexusflow_NexusRaven-V2-13B/f5e5662e-803e-4f1f-82e7-14a2a189ed6d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Nexusflow_NexusRaven-V2-13B/1762652579.782948",
- "retrieved_timestamp": "1762652579.7829492",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Nexusflow/NexusRaven-V2-13B",
- "developer": "Nexusflow",
- "inference_platform": "unknown",
- "id": "Nexusflow/NexusRaven-V2-13B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1790781792311068
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39488604640507335
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02945619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3736875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18716755319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 13.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NikolaSigmoid/NikolaSigmoid_AceMath-1.5B-Instruct-1epoch/0cf3db2f-9b23-4602-ac92-265bafd36410.json b/leaderboard_data/HFOpenLLMv2/NikolaSigmoid/NikolaSigmoid_AceMath-1.5B-Instruct-1epoch/0cf3db2f-9b23-4602-ac92-265bafd36410.json
deleted file mode 100644
index b6f521bc642f6aa6c18dab8db4170acf086a0420..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NikolaSigmoid/NikolaSigmoid_AceMath-1.5B-Instruct-1epoch/0cf3db2f-9b23-4602-ac92-265bafd36410.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NikolaSigmoid_AceMath-1.5B-Instruct-1epoch/1762652579.783191",
- "retrieved_timestamp": "1762652579.7831922",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NikolaSigmoid/AceMath-1.5B-Instruct-1epoch",
- "developer": "NikolaSigmoid",
- "inference_platform": "unknown",
- "id": "NikolaSigmoid/AceMath-1.5B-Instruct-1epoch"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2848918646967823
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.426284784119477
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30513595166163143
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39251041666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23761635638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.791
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NikolaSigmoid/NikolaSigmoid_AceMath-1.5B-Instruct-dolphin-r1-200/93f56942-30d8-4a0f-af8d-901fb264436c.json b/leaderboard_data/HFOpenLLMv2/NikolaSigmoid/NikolaSigmoid_AceMath-1.5B-Instruct-dolphin-r1-200/93f56942-30d8-4a0f-af8d-901fb264436c.json
deleted file mode 100644
index b3b9181ee5c95aa3eec845ecc29903ebc2da4ac9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NikolaSigmoid/NikolaSigmoid_AceMath-1.5B-Instruct-dolphin-r1-200/93f56942-30d8-4a0f-af8d-901fb264436c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NikolaSigmoid_AceMath-1.5B-Instruct-dolphin-r1-200/1762652579.783446",
- "retrieved_timestamp": "1762652579.783447",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200",
- "developer": "NikolaSigmoid",
- "inference_platform": "unknown",
- "id": "NikolaSigmoid/AceMath-1.5B-Instruct-dolphin-r1-200"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18080249294095221
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28148007801214714
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2558724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37495833333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11427859042553191
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.928
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NikolaSigmoid/NikolaSigmoid_acemath-200/4414a96e-0664-4531-9c0f-3eb4a062fbe2.json b/leaderboard_data/HFOpenLLMv2/NikolaSigmoid/NikolaSigmoid_acemath-200/4414a96e-0664-4531-9c0f-3eb4a062fbe2.json
deleted file mode 100644
index 06b5c1c799c33c5d3263abfa555ec96c93deb4a5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NikolaSigmoid/NikolaSigmoid_acemath-200/4414a96e-0664-4531-9c0f-3eb4a062fbe2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NikolaSigmoid_acemath-200/1762652579.783974",
- "retrieved_timestamp": "1762652579.783974",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NikolaSigmoid/acemath-200",
- "developer": "NikolaSigmoid",
- "inference_platform": "unknown",
- "id": "NikolaSigmoid/acemath-200"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2848918646967823
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.426284784119477
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30513595166163143
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39251041666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23761635638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.791
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris-BMO_Violent-GRPO-v0.420/e841483e-042b-4a2a-8dbc-9ed7529f7618.json b/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris-BMO_Violent-GRPO-v0.420/e841483e-042b-4a2a-8dbc-9ed7529f7618.json
deleted file mode 100644
index 551aeb0c7d4169794286fff269bd335c0e51ec22..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris-BMO_Violent-GRPO-v0.420/e841483e-042b-4a2a-8dbc-9ed7529f7618.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain-Eris-BMO_Violent-GRPO-v0.420/1762652579.784868",
- "retrieved_timestamp": "1762652579.7848692",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420",
- "developer": "Nitral-AI",
- "inference_platform": "unknown",
- "id": "Nitral-AI/Captain-Eris-BMO_Violent-GRPO-v0.420"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6312805578088361
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5078530730075063
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13141993957703926
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4228020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.359624335106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris_BMO-Violent-12B/ebcd5d63-5c91-41d1-b9e2-0bafe7170000.json b/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris_BMO-Violent-12B/ebcd5d63-5c91-41d1-b9e2-0bafe7170000.json
deleted file mode 100644
index 8f66107eb01315ebc4f550e38b4b30e6afd71cc3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris_BMO-Violent-12B/ebcd5d63-5c91-41d1-b9e2-0bafe7170000.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain-Eris_BMO-Violent-12B/1762652579.785123",
- "retrieved_timestamp": "1762652579.785124",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Nitral-AI/Captain-Eris_BMO-Violent-12B",
- "developer": "Nitral-AI",
- "inference_platform": "unknown",
- "id": "Nitral-AI/Captain-Eris_BMO-Violent-12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.615218730745533
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5104372825851065
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13670694864048338
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42553124999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35713098404255317
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris_Violet-GRPO-v0.420/cf030461-1234-48ce-a025-ba0f52cdf191.json b/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris_Violet-GRPO-v0.420/cf030461-1234-48ce-a025-ba0f52cdf191.json
deleted file mode 100644
index 08f53d583a71c72a0082ee2df7ec5105a75c89b0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris_Violet-GRPO-v0.420/cf030461-1234-48ce-a025-ba0f52cdf191.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain-Eris_Violet-GRPO-v0.420/1762652579.785343",
- "retrieved_timestamp": "1762652579.785344",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Nitral-AI/Captain-Eris_Violet-GRPO-v0.420",
- "developer": "Nitral-AI",
- "inference_platform": "unknown",
- "id": "Nitral-AI/Captain-Eris_Violet-GRPO-v0.420"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6261597007052399
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.515921407165298
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10800604229607251
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42791666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35347406914893614
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris_Violet-V0.420-12B/ad87ba77-99a9-463f-aea3-1d29fc0317b0.json b/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris_Violet-V0.420-12B/ad87ba77-99a9-463f-aea3-1d29fc0317b0.json
deleted file mode 100644
index f6777e5b9a9e8e12a06bbabcd7590a4fbd5d78dd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain-Eris_Violet-V0.420-12B/ad87ba77-99a9-463f-aea3-1d29fc0317b0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain-Eris_Violet-V0.420-12B/1762652579.785556",
- "retrieved_timestamp": "1762652579.785557",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Nitral-AI/Captain-Eris_Violet-V0.420-12B",
- "developer": "Nitral-AI",
- "inference_platform": "unknown",
- "id": "Nitral-AI/Captain-Eris_Violet-V0.420-12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43391866913123844
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5478099417611365
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10725075528700906
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.311241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43306249999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3722573138297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain_BMO-12B/6fed7e5b-9692-40f7-913e-fc3b57b8c72a.json b/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain_BMO-12B/6fed7e5b-9692-40f7-913e-fc3b57b8c72a.json
deleted file mode 100644
index 33861e469525e801aaa68000cea020e3b49cb445..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Captain_BMO-12B/6fed7e5b-9692-40f7-913e-fc3b57b8c72a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Nitral-AI_Captain_BMO-12B/1762652579.7857668",
- "retrieved_timestamp": "1762652579.7857668",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Nitral-AI/Captain_BMO-12B",
- "developer": "Nitral-AI",
- "inference_platform": "unknown",
- "id": "Nitral-AI/Captain_BMO-12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4750595087700634
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5285960650424973
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13972809667673716
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37480208333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3568816489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Hathor_Stable-v0.2-L3-8B/2bb06e2f-9aee-4ac4-b9a6-fe537c2c9890.json b/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Hathor_Stable-v0.2-L3-8B/2bb06e2f-9aee-4ac4-b9a6-fe537c2c9890.json
deleted file mode 100644
index e3b1d6413fd50d2d76d467ec5936bbac8bb93413..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Hathor_Stable-v0.2-L3-8B/2bb06e2f-9aee-4ac4-b9a6-fe537c2c9890.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Nitral-AI_Hathor_Stable-v0.2-L3-8B/1762652579.7859662",
- "retrieved_timestamp": "1762652579.785967",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Nitral-AI/Hathor_Stable-v0.2-L3-8B",
- "developer": "Nitral-AI",
- "inference_platform": "unknown",
- "id": "Nitral-AI/Hathor_Stable-v0.2-L3-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7174840534226963
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5285819178301682
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10498489425981873
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28691275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3780625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36959773936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Hathor_Tahsin-L3-8B-v0.85/a73461e6-a1f4-43c9-9a0f-f03c9be46276.json b/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Hathor_Tahsin-L3-8B-v0.85/a73461e6-a1f4-43c9-9a0f-f03c9be46276.json
deleted file mode 100644
index 74b9a7bdef40b8dfa846952c82bb5abde8bfb606..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Hathor_Tahsin-L3-8B-v0.85/a73461e6-a1f4-43c9-9a0f-f03c9be46276.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Nitral-AI_Hathor_Tahsin-L3-8B-v0.85/1762652579.786179",
- "retrieved_timestamp": "1762652579.78618",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Nitral-AI/Hathor_Tahsin-L3-8B-v0.85",
- "developer": "Nitral-AI",
- "inference_platform": "unknown",
- "id": "Nitral-AI/Hathor_Tahsin-L3-8B-v0.85"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7110145524984818
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5279036861109899
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10045317220543806
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3646666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37200797872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Nera_Noctis-12B/2f5caa38-56e9-4740-baca-22fb02e57150.json b/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Nera_Noctis-12B/2f5caa38-56e9-4740-baca-22fb02e57150.json
deleted file mode 100644
index a5243d0dc55c80dca2de1700f7cae7badc90893f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Nitral-AI/Nitral-AI_Nera_Noctis-12B/2f5caa38-56e9-4740-baca-22fb02e57150.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Nitral-AI_Nera_Noctis-12B/1762652579.786392",
- "retrieved_timestamp": "1762652579.7863932",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Nitral-AI/Nera_Noctis-12B",
- "developer": "Nitral-AI",
- "inference_platform": "unknown",
- "id": "Nitral-AI/Nera_Noctis-12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45617517076911485
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5193675192746302
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08761329305135952
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39790624999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3468251329787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Nohobby/Nohobby_MS-Schisandra-22B-v0.1/9836e2c7-30df-421d-bf02-d4434f97d990.json b/leaderboard_data/HFOpenLLMv2/Nohobby/Nohobby_MS-Schisandra-22B-v0.1/9836e2c7-30df-421d-bf02-d4434f97d990.json
deleted file mode 100644
index 067c99d8b732b8069f45fc49b05b17135751c014..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Nohobby/Nohobby_MS-Schisandra-22B-v0.1/9836e2c7-30df-421d-bf02-d4434f97d990.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Nohobby_MS-Schisandra-22B-v0.1/1762652579.786606",
- "retrieved_timestamp": "1762652579.786607",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Nohobby/MS-Schisandra-22B-v0.1",
- "developer": "Nohobby",
- "inference_platform": "unknown",
- "id": "Nohobby/MS-Schisandra-22B-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6331289866443259
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5789949714896523
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22280966767371602
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33221476510067116
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39284375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4095744680851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 22.247
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Nohobby/Nohobby_MS-Schisandra-22B-v0.2/9a263094-fb31-43b9-9307-6ae5f64f82c0.json b/leaderboard_data/HFOpenLLMv2/Nohobby/Nohobby_MS-Schisandra-22B-v0.2/9a263094-fb31-43b9-9307-6ae5f64f82c0.json
deleted file mode 100644
index ff6333620edd12c51381c0959339e1bc717d4511..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Nohobby/Nohobby_MS-Schisandra-22B-v0.2/9a263094-fb31-43b9-9307-6ae5f64f82c0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Nohobby_MS-Schisandra-22B-v0.2/1762652579.78686",
- "retrieved_timestamp": "1762652579.786861",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Nohobby/MS-Schisandra-22B-v0.2",
- "developer": "Nohobby",
- "inference_platform": "unknown",
- "id": "Nohobby/MS-Schisandra-22B-v0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6382997114323329
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5841215984231857
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20317220543806647
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33557046979865773
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40747916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4136469414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 22.247
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Alpha/6ce53368-e6b5-45a1-a997-ca5468f27c13.json b/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Alpha/6ce53368-e6b5-45a1-a997-ca5468f27c13.json
deleted file mode 100644
index 42cae64503cc2c7808ce4d67ad06986ec6d407ac..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Alpha/6ce53368-e6b5-45a1-a997-ca5468f27c13.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Norquinal_Alpha/1762652579.787071",
- "retrieved_timestamp": "1762652579.787072",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Norquinal/Alpha",
- "developer": "Norquinal",
- "inference_platform": "unknown",
- "id": "Norquinal/Alpha"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802951723648808
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3373652507108038
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05740181268882175
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36308333333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30028257978723405
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Bravo/dbdae48e-5023-453f-b15f-cf779068e030.json b/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Bravo/dbdae48e-5023-453f-b15f-cf779068e030.json
deleted file mode 100644
index a1f7c4934858c04221d4e6509b50bfc7e09dcf2b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Bravo/dbdae48e-5023-453f-b15f-cf779068e030.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Norquinal_Bravo/1762652579.787321",
- "retrieved_timestamp": "1762652579.787322",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Norquinal/Bravo",
- "developer": "Norquinal",
- "inference_platform": "unknown",
- "id": "Norquinal/Bravo"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3024519386339357
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3558431980261287
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05740181268882175
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28187919463087246
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38686458333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.312749335106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Charlie/31f784e4-bded-48d8-b7a6-7936b5d21d9e.json b/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Charlie/31f784e4-bded-48d8-b7a6-7936b5d21d9e.json
deleted file mode 100644
index 92c8e05aaa62324f05f3fd6e9031f461e909e51f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Charlie/31f784e4-bded-48d8-b7a6-7936b5d21d9e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Norquinal_Charlie/1762652579.787528",
- "retrieved_timestamp": "1762652579.787528",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Norquinal/Charlie",
- "developer": "Norquinal",
- "inference_platform": "unknown",
- "id": "Norquinal/Charlie"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3060989286205047
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3515288346438244
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0581570996978852
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3736875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30925864361702127
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Delta/684a3a6e-c74d-456f-b80e-c099b8c9973c.json b/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Delta/684a3a6e-c74d-456f-b80e-c099b8c9973c.json
deleted file mode 100644
index b9f2540c526e838b32a64eae12b912e5f8cc5b4e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Delta/684a3a6e-c74d-456f-b80e-c099b8c9973c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Norquinal_Delta/1762652579.78773",
- "retrieved_timestamp": "1762652579.787731",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Norquinal/Delta",
- "developer": "Norquinal",
- "inference_platform": "unknown",
- "id": "Norquinal/Delta"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.253842028041153
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3434783285415976
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06117824773413897
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3776875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2958776595744681
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Echo/f2f250f7-8cb0-4076-b2f0-7cf8ee911532.json b/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Echo/f2f250f7-8cb0-4076-b2f0-7cf8ee911532.json
deleted file mode 100644
index ce746cafeb2810cb2a6471f2da82987f4cd61ecb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Echo/f2f250f7-8cb0-4076-b2f0-7cf8ee911532.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Norquinal_Echo/1762652579.787929",
- "retrieved_timestamp": "1762652579.787929",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Norquinal/Echo",
- "developer": "Norquinal",
- "inference_platform": "unknown",
- "id": "Norquinal/Echo"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31579099012841483
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35304654390055795
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05740181268882175
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3804479166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30950797872340424
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Foxtrot/2a4428d4-a6c9-427c-ba67-72f08b590b8e.json b/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Foxtrot/2a4428d4-a6c9-427c-ba67-72f08b590b8e.json
deleted file mode 100644
index 19ac8fdf54917d6b9446c954167255f8bcef8995..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Foxtrot/2a4428d4-a6c9-427c-ba67-72f08b590b8e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Norquinal_Foxtrot/1762652579.788121",
- "retrieved_timestamp": "1762652579.788121",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Norquinal/Foxtrot",
- "developer": "Norquinal",
- "inference_platform": "unknown",
- "id": "Norquinal/Foxtrot"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011531624977283
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3558026577191667
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0581570996978852
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28691275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3804166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30501994680851063
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Golf/dfdcfbfa-c023-40bf-b5e3-632b45f28aab.json b/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Golf/dfdcfbfa-c023-40bf-b5e3-632b45f28aab.json
deleted file mode 100644
index 3bd2bad02d33f25160fb68cde5b4b403cc93eee2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Golf/dfdcfbfa-c023-40bf-b5e3-632b45f28aab.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Norquinal_Golf/1762652579.788314",
- "retrieved_timestamp": "1762652579.7883148",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Norquinal/Golf",
- "developer": "Norquinal",
- "inference_platform": "unknown",
- "id": "Norquinal/Golf"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3533601953926692
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35332648991705207
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05362537764350453
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.338
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30560172872340424
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Hotel/f91abb9a-6690-4fec-b1a7-f519dfe66d24.json b/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Hotel/f91abb9a-6690-4fec-b1a7-f519dfe66d24.json
deleted file mode 100644
index e913776d4505ff15a9c17a0715ad6fcf290c7bdc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Norquinal/Norquinal_Hotel/f91abb9a-6690-4fec-b1a7-f519dfe66d24.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Norquinal_Hotel/1762652579.788509",
- "retrieved_timestamp": "1762652579.7885098",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Norquinal/Hotel",
- "developer": "Norquinal",
- "inference_platform": "unknown",
- "id": "Norquinal/Hotel"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3215113676157041
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36785702492059275
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.052870090634441085
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3288229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3156582446808511
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-Mistral-7B-DPO/877421ae-8135-485f-805e-489ed70dc886.json b/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-Mistral-7B-DPO/877421ae-8135-485f-805e-489ed70dc886.json
deleted file mode 100644
index f5cd822c3b9be4dbdb7f08cf4492e4be8cc26adb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-Mistral-7B-DPO/877421ae-8135-485f-805e-489ed70dc886.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-2-Mistral-7B-DPO/1762652579.7912042",
- "retrieved_timestamp": "1762652579.7912052",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
- "developer": "NousResearch",
- "inference_platform": "unknown",
- "id": "NousResearch/Nous-Hermes-2-Mistral-7B-DPO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5762510139762497
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48526536654652347
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04758308157099697
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29278523489932884
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3999791666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3015292553191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-Mixtral-8x7B-DPO/bc2d14fe-000a-40ce-a57c-c00fe584a7e4.json b/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-Mixtral-8x7B-DPO/bc2d14fe-000a-40ce-a57c-c00fe584a7e4.json
deleted file mode 100644
index 47c9b778617de8bc98abbd2b592dde312d9271c7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-Mixtral-8x7B-DPO/bc2d14fe-000a-40ce-a57c-c00fe584a7e4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-2-Mixtral-8x7B-DPO/1762652579.791439",
- "retrieved_timestamp": "1762652579.7914398",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
- "developer": "NousResearch",
- "inference_platform": "unknown",
- "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5896898008395501
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5538851384033822
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12235649546827794
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3213087248322148
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4595416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3666057180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 46.703
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-Mixtral-8x7B-SFT/3c196d70-44ad-419c-8c4c-80fc7f184687.json b/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-Mixtral-8x7B-SFT/3c196d70-44ad-419c-8c4c-80fc7f184687.json
deleted file mode 100644
index db5eed83d6bfda86ac93168d0669b58e2c9d0a1a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-Mixtral-8x7B-SFT/3c196d70-44ad-419c-8c4c-80fc7f184687.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-2-Mixtral-8x7B-SFT/1762652579.791643",
- "retrieved_timestamp": "1762652579.7916439",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT",
- "developer": "NousResearch",
- "inference_platform": "unknown",
- "id": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5730783210769648
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5057868454026635
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.021148036253776436
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.421375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30659906914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 46.703
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-SOLAR-10.7B/80a7b60b-77f7-4dbf-96c8-071c56179fec.json b/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-SOLAR-10.7B/80a7b60b-77f7-4dbf-96c8-071c56179fec.json
deleted file mode 100644
index 63613e92ed13f5ed025b93ff79b3332a3fa85a46..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Nous-Hermes-2-SOLAR-10.7B/80a7b60b-77f7-4dbf-96c8-071c56179fec.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NousResearch_Nous-Hermes-2-SOLAR-10.7B/1762652579.791853",
- "retrieved_timestamp": "1762652579.7918541",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NousResearch/Nous-Hermes-2-SOLAR-10.7B",
- "developer": "NousResearch",
- "inference_platform": "unknown",
- "id": "NousResearch/Nous-Hermes-2-SOLAR-10.7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5278660620486975
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5414294841140173
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05740181268882175
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43728125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3458277925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.732
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Yarn-Solar-10b-32k/a18a259d-1795-4848-94fd-3b9c3abfb9da.json b/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Yarn-Solar-10b-32k/a18a259d-1795-4848-94fd-3b9c3abfb9da.json
deleted file mode 100644
index fdf37d0246c6fd3c937eca986000b9d996e1cb7d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Yarn-Solar-10b-32k/a18a259d-1795-4848-94fd-3b9c3abfb9da.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Solar-10b-32k/1762652579.793437",
- "retrieved_timestamp": "1762652579.793438",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NousResearch/Yarn-Solar-10b-32k",
- "developer": "NousResearch",
- "inference_platform": "unknown",
- "id": "NousResearch/Yarn-Solar-10b-32k"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19421579187666504
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4986859152325069
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.030211480362537766
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4146458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32721077127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Yarn-Solar-10b-64k/1904c811-34ae-4f52-9978-622bc6dd6f2e.json b/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Yarn-Solar-10b-64k/1904c811-34ae-4f52-9978-622bc6dd6f2e.json
deleted file mode 100644
index c57b84d30144f4eb257041c9a03b33c338575eb5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NousResearch/NousResearch_Yarn-Solar-10b-64k/1904c811-34ae-4f52-9978-622bc6dd6f2e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NousResearch_Yarn-Solar-10b-64k/1762652579.793644",
- "retrieved_timestamp": "1762652579.7936451",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NousResearch/Yarn-Solar-10b-64k",
- "developer": "NousResearch",
- "inference_platform": "unknown",
- "id": "NousResearch/Yarn-Solar-10b-64k"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1988867316498003
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49219907954226505
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.028700906344410877
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40143750000000006
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3148271276595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_ASTAROTH-3.2-1B/e454276c-3113-49f8-9397-9c1ad5e7bcc5.json b/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_ASTAROTH-3.2-1B/e454276c-3113-49f8-9397-9c1ad5e7bcc5.json
deleted file mode 100644
index c140c09c913f41d1d829011beeabb2b02ac6930f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_ASTAROTH-3.2-1B/e454276c-3113-49f8-9397-9c1ad5e7bcc5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Novaciano_ASTAROTH-3.2-1B/1762652579.7938519",
- "retrieved_timestamp": "1762652579.793853",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Novaciano/ASTAROTH-3.2-1B",
- "developer": "Novaciano",
- "inference_platform": "unknown",
- "id": "Novaciano/ASTAROTH-3.2-1B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5612884923115112
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3542962056805596
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07326283987915408
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2558724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31421875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19090757978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.498
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_BLAST_PROCESSING-3.2-1B/61173be4-9a87-4dfa-812d-b414b4d2bccb.json b/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_BLAST_PROCESSING-3.2-1B/61173be4-9a87-4dfa-812d-b414b4d2bccb.json
deleted file mode 100644
index 589a169eb0ce4c24521fbdafd081d8ce95a20958..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_BLAST_PROCESSING-3.2-1B/61173be4-9a87-4dfa-812d-b414b4d2bccb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Novaciano_BLAST_PROCESSING-3.2-1B/1762652579.794129",
- "retrieved_timestamp": "1762652579.7941298",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Novaciano/BLAST_PROCESSING-3.2-1B",
- "developer": "Novaciano",
- "inference_platform": "unknown",
- "id": "Novaciano/BLAST_PROCESSING-3.2-1B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3921783091087204
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3460318843168258
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07477341389728097
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3351458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19414893617021275
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.498
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Cerberus-3.2-1B/2d6ff76b-9d81-45a7-8768-6a240b5395ab.json b/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Cerberus-3.2-1B/2d6ff76b-9d81-45a7-8768-6a240b5395ab.json
deleted file mode 100644
index 6872e2b81e9cad8b44778c9a38da8cdb5e56b46f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Cerberus-3.2-1B/2d6ff76b-9d81-45a7-8768-6a240b5395ab.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Novaciano_Cerberus-3.2-1B/1762652579.7945569",
- "retrieved_timestamp": "1762652579.794559",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Novaciano/Cerberus-3.2-1B",
- "developer": "Novaciano",
- "inference_platform": "unknown",
- "id": "Novaciano/Cerberus-3.2-1B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5016877440746109
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4164937678626939
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0581570996978852
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25838926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32888541666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1663065159574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.236
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Cultist-3.2-1B/3dc51dce-222f-455b-b61a-04904c7fc855.json b/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Cultist-3.2-1B/3dc51dce-222f-455b-b61a-04904c7fc855.json
deleted file mode 100644
index d05e7d7598ec21e3977d8a5c8a4d398f44884d87..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Cultist-3.2-1B/3dc51dce-222f-455b-b61a-04904c7fc855.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Novaciano_Cultist-3.2-1B/1762652579.7949288",
- "retrieved_timestamp": "1762652579.79493",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Novaciano/Cultist-3.2-1B",
- "developer": "Novaciano",
- "inference_platform": "unknown",
- "id": "Novaciano/Cultist-3.2-1B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5294895322189568
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3399311286410264
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05891238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3330104166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17137632978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.498
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_FuseChat-3.2-1B-GRPO_Creative_RP/16a8882c-12f5-46d0-8e1f-88b22aa8f08c.json b/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_FuseChat-3.2-1B-GRPO_Creative_RP/16a8882c-12f5-46d0-8e1f-88b22aa8f08c.json
deleted file mode 100644
index db9cccd9ee19800cb7a6785275b4339fdc7c2acd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_FuseChat-3.2-1B-GRPO_Creative_RP/16a8882c-12f5-46d0-8e1f-88b22aa8f08c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Novaciano_FuseChat-3.2-1B-GRPO_Creative_RP/1762652579.795153",
- "retrieved_timestamp": "1762652579.795153",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP",
- "developer": "Novaciano",
- "inference_platform": "unknown",
- "id": "Novaciano/FuseChat-3.2-1B-GRPO_Creative_RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.559814625194484
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3487816706572648
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08006042296072508
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2558724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33288541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17345412234042554
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.236
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Fusetrix-3.2-1B-GRPO_RP_Creative/7fe4c32b-0bbd-49c0-9e4f-43306457aae8.json b/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Fusetrix-3.2-1B-GRPO_RP_Creative/7fe4c32b-0bbd-49c0-9e4f-43306457aae8.json
deleted file mode 100644
index 5295e689b15da794f06ff2cefd10f0e0968d652d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Fusetrix-3.2-1B-GRPO_RP_Creative/7fe4c32b-0bbd-49c0-9e4f-43306457aae8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Novaciano_Fusetrix-3.2-1B-GRPO_RP_Creative/1762652579.795362",
- "retrieved_timestamp": "1762652579.795362",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative",
- "developer": "Novaciano",
- "inference_platform": "unknown",
- "id": "Novaciano/Fusetrix-3.2-1B-GRPO_RP_Creative"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5366339091388627
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3434595088038714
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1148036253776435
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3209166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17578125
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.236
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_HarmfulProject-3.2-1B/99b31db9-55f8-41c2-9eb9-f21511deccf0.json b/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_HarmfulProject-3.2-1B/99b31db9-55f8-41c2-9eb9-f21511deccf0.json
deleted file mode 100644
index 4fd9b12359e8890a13494b7212bebbc009c1fc20..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_HarmfulProject-3.2-1B/99b31db9-55f8-41c2-9eb9-f21511deccf0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Novaciano_HarmfulProject-3.2-1B/1762652579.7958348",
- "retrieved_timestamp": "1762652579.795836",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Novaciano/HarmfulProject-3.2-1B",
- "developer": "Novaciano",
- "inference_platform": "unknown",
- "id": "Novaciano/HarmfulProject-3.2-1B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3873821460391761
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32744993658117816
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04758308157099698
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26677852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.341875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18226396276595744
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.498
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_LEWD-Mental-Cultist-3.2-1B/1bce579e-9fac-46a9-92ef-48080832abbb.json b/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_LEWD-Mental-Cultist-3.2-1B/1bce579e-9fac-46a9-92ef-48080832abbb.json
deleted file mode 100644
index c61fb8125476e9c61af6974f78011613a7799350..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_LEWD-Mental-Cultist-3.2-1B/1bce579e-9fac-46a9-92ef-48080832abbb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Novaciano_LEWD-Mental-Cultist-3.2-1B/1762652579.796045",
- "retrieved_timestamp": "1762652579.796046",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Novaciano/LEWD-Mental-Cultist-3.2-1B",
- "developer": "Novaciano",
- "inference_platform": "unknown",
- "id": "Novaciano/LEWD-Mental-Cultist-3.2-1B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5308636639671627
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35127188813594756
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.052870090634441085
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25671140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32228125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1768617021276596
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.498
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_La_Mejor_Mezcla-3.2-1B/49fef1c9-bf18-465c-acdb-b8f17e93dbad.json b/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_La_Mejor_Mezcla-3.2-1B/49fef1c9-bf18-465c-acdb-b8f17e93dbad.json
deleted file mode 100644
index 18eb35e54ab0e2430b61cababea653e6c16e743c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_La_Mejor_Mezcla-3.2-1B/49fef1c9-bf18-465c-acdb-b8f17e93dbad.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Novaciano_La_Mejor_Mezcla-3.2-1B/1762652579.79625",
- "retrieved_timestamp": "1762652579.7962508",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Novaciano/La_Mejor_Mezcla-3.2-1B",
- "developer": "Novaciano",
- "inference_platform": "unknown",
- "id": "Novaciano/La_Mejor_Mezcla-3.2-1B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5509969104199081
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34879364478381225
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08987915407854985
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18292885638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.498
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Sigil-Of-Satan-3.2-1B/ae9ceba0-8e8a-431f-a762-7bb6c55b4757.json b/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Sigil-Of-Satan-3.2-1B/ae9ceba0-8e8a-431f-a762-7bb6c55b4757.json
deleted file mode 100644
index 632aaafe6be4e0175a9b4f1c1dddf545698eb81c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Novaciano/Novaciano_Sigil-Of-Satan-3.2-1B/ae9ceba0-8e8a-431f-a762-7bb6c55b4757.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Novaciano_Sigil-Of-Satan-3.2-1B/1762652579.7964501",
- "retrieved_timestamp": "1762652579.7964501",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Novaciano/Sigil-Of-Satan-3.2-1B",
- "developer": "Novaciano",
- "inference_platform": "unknown",
- "id": "Novaciano/Sigil-Of-Satan-3.2-1B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5494233079340594
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3545862332731657
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.054380664652567974
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3276145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18550531914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.498
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/NucleusAI/NucleusAI_nucleus-22B-token-500B/f18c51de-f5eb-4986-8c44-35bd71db5e8b.json b/leaderboard_data/HFOpenLLMv2/NucleusAI/NucleusAI_nucleus-22B-token-500B/f18c51de-f5eb-4986-8c44-35bd71db5e8b.json
deleted file mode 100644
index 6c331a0c72a8f15a04fa5fe485e8e0b20087e7ab..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/NucleusAI/NucleusAI_nucleus-22B-token-500B/f18c51de-f5eb-4986-8c44-35bd71db5e8b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NucleusAI_nucleus-22B-token-500B/1762652579.7966561",
- "retrieved_timestamp": "1762652579.7966561",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NucleusAI/nucleus-22B-token-500B",
- "developer": "NucleusAI",
- "inference_platform": "unknown",
- "id": "NucleusAI/nucleus-22B-token-500B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.025654153202391873
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29198007801214715
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3510520833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11619015957446809
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 21.828
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI-15B/4ffdc303-b5e4-45f0-839c-432f04dc5d57.json b/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI-15B/4ffdc303-b5e4-45f0-839c-432f04dc5d57.json
deleted file mode 100644
index e81c64c73140a3358ada28c1821c33ba55420f6f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI-15B/4ffdc303-b5e4-45f0-839c-432f04dc5d57.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OEvortex_HelpingAI-15B/1762652579.797408",
- "retrieved_timestamp": "1762652579.797409",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OEvortex/HelpingAI-15B",
- "developer": "OEvortex",
- "inference_platform": "unknown",
- "id": "OEvortex/HelpingAI-15B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2030091268944179
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936006977853758
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.361875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11112034574468085
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 15.323
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI-3B-reloaded/628026b2-efc1-4592-a85b-f5d2ea1dc1dd.json b/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI-3B-reloaded/628026b2-efc1-4592-a85b-f5d2ea1dc1dd.json
deleted file mode 100644
index 4e5348434a95436be5ca4db422e5d5d6cf4a9419..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI-3B-reloaded/628026b2-efc1-4592-a85b-f5d2ea1dc1dd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OEvortex_HelpingAI-3B-reloaded/1762652579.797647",
- "retrieved_timestamp": "1762652579.797647",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OEvortex/HelpingAI-3B-reloaded",
- "developer": "OEvortex",
- "inference_platform": "unknown",
- "id": "OEvortex/HelpingAI-3B-reloaded"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46466819150963884
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4128512897904065
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.013595166163141994
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3524479166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25947473404255317
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 2.81
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI2-9B/d04d6474-5784-4492-8347-a2bc03eca6ba.json b/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI2-9B/d04d6474-5784-4492-8347-a2bc03eca6ba.json
deleted file mode 100644
index 74afc67f4e08782cef751deecb0b4dcc6bc2fee8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI2-9B/d04d6474-5784-4492-8347-a2bc03eca6ba.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OEvortex_HelpingAI2-9B/1762652579.797843",
- "retrieved_timestamp": "1762652579.797844",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OEvortex/HelpingAI2-9B",
- "developer": "OEvortex",
- "inference_platform": "unknown",
- "id": "OEvortex/HelpingAI2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44131238447319776
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4844617641983123
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05891238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25838926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3710833333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28997672872340424
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.903
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI2.5-10B/6a41fcba-f13d-4839-8a91-ff3f18de5114.json b/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI2.5-10B/6a41fcba-f13d-4839-8a91-ff3f18de5114.json
deleted file mode 100644
index 1cffa2224801c090fdd40153b39c61dacd724f60..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OEvortex/OEvortex_HelpingAI2.5-10B/6a41fcba-f13d-4839-8a91-ff3f18de5114.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OEvortex_HelpingAI2.5-10B/1762652579.798051",
- "retrieved_timestamp": "1762652579.798051",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OEvortex/HelpingAI2.5-10B",
- "developer": "OEvortex",
- "inference_platform": "unknown",
- "id": "OEvortex/HelpingAI2.5-10B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32765617450586665
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4495657491171711
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02039274924471299
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26929530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37381250000000005
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25748005319148937
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.211
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OliveiraJLT/OliveiraJLT_Sagui-7B-Instruct-v0.1/d5135349-0757-469d-8ad3-80ef56d1f7de.json b/leaderboard_data/HFOpenLLMv2/OliveiraJLT/OliveiraJLT_Sagui-7B-Instruct-v0.1/d5135349-0757-469d-8ad3-80ef56d1f7de.json
deleted file mode 100644
index ac43c436e04615cc1dae9e95d836c2ab9cf8976b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OliveiraJLT/OliveiraJLT_Sagui-7B-Instruct-v0.1/d5135349-0757-469d-8ad3-80ef56d1f7de.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OliveiraJLT_Sagui-7B-Instruct-v0.1/1762652579.798249",
- "retrieved_timestamp": "1762652579.798249",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OliveiraJLT/Sagui-7B-Instruct-v0.1",
- "developer": "OliveiraJLT",
- "inference_platform": "unknown",
- "id": "OliveiraJLT/Sagui-7B-Instruct-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28916275482386733
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3110678914743868
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015105740181268883
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2424496644295302
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4190520833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14852061170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.738
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Omkar1102/Omkar1102_code-yi/2609af14-3cff-4b19-9741-e1caca56f58a.json b/leaderboard_data/HFOpenLLMv2/Omkar1102/Omkar1102_code-yi/2609af14-3cff-4b19-9741-e1caca56f58a.json
deleted file mode 100644
index 7209bd367e9b72e9241a7eec999f63cfbc1a6b03..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Omkar1102/Omkar1102_code-yi/2609af14-3cff-4b19-9741-e1caca56f58a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Omkar1102_code-yi/1762652579.79849",
- "retrieved_timestamp": "1762652579.7984908",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Omkar1102/code-yi",
- "developer": "Omkar1102",
- "inference_platform": "unknown",
- "id": "Omkar1102/code-yi"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21477457590304835
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2760062695877461
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25083892617449666
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3802291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11261635638297872
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 2.084
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Omkar1102/Omkar1102_code-yi/3edef2ec-9fad-45ba-8fde-4af5c4f24d69.json b/leaderboard_data/HFOpenLLMv2/Omkar1102/Omkar1102_code-yi/3edef2ec-9fad-45ba-8fde-4af5c4f24d69.json
deleted file mode 100644
index e86f6a4351154a8040b0b46baba330a82a06168c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Omkar1102/Omkar1102_code-yi/3edef2ec-9fad-45ba-8fde-4af5c4f24d69.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Omkar1102_code-yi/1762652579.798722",
- "retrieved_timestamp": "1762652579.798723",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Omkar1102/code-yi",
- "developer": "Omkar1102",
- "inference_platform": "unknown",
- "id": "Omkar1102/code-yi"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2254407195131141
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2750025242693941
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3761979166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11228390957446809
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 2.084
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OmnicromsBrain/OmnicromsBrain_NeuralStar_FusionWriter_4x7b/65ba6556-712c-42cc-817b-ad8c2014dc4c.json b/leaderboard_data/HFOpenLLMv2/OmnicromsBrain/OmnicromsBrain_NeuralStar_FusionWriter_4x7b/65ba6556-712c-42cc-817b-ad8c2014dc4c.json
deleted file mode 100644
index c0fa68afa9390e770f49da08d683246f96e7f868..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OmnicromsBrain/OmnicromsBrain_NeuralStar_FusionWriter_4x7b/65ba6556-712c-42cc-817b-ad8c2014dc4c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OmnicromsBrain_NeuralStar_FusionWriter_4x7b/1762652579.7988968",
- "retrieved_timestamp": "1762652579.798898",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OmnicromsBrain/NeuralStar_FusionWriter_4x7b",
- "developer": "OmnicromsBrain",
- "inference_platform": "unknown",
- "id": "OmnicromsBrain/NeuralStar_FusionWriter_4x7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5963842604289951
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47762434766958123
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04909365558912387
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2785234899328859
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.401875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2605551861702128
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 24.154
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OnlyCheeini/OnlyCheeini_greesychat-turbo/f3a7f01c-2893-4887-a210-d126d9135edf.json b/leaderboard_data/HFOpenLLMv2/OnlyCheeini/OnlyCheeini_greesychat-turbo/f3a7f01c-2893-4887-a210-d126d9135edf.json
deleted file mode 100644
index 1d25216ee37ac21e2dd445f35f0643dbe71c0656..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OnlyCheeini/OnlyCheeini_greesychat-turbo/f3a7f01c-2893-4887-a210-d126d9135edf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OnlyCheeini_greesychat-turbo/1762652579.7991328",
- "retrieved_timestamp": "1762652579.799134",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OnlyCheeini/greesychat-turbo",
- "developer": "OnlyCheeini",
- "inference_platform": "unknown",
- "id": "OnlyCheeini/greesychat-turbo"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.023256071667619692
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30921339082318816
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3314270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11377992021276596
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OpenAssistant/OpenAssistant_oasst-sft-1-pythia-12b/ba1129fd-f158-47ad-b194-7cff794b9ef2.json b/leaderboard_data/HFOpenLLMv2/OpenAssistant/OpenAssistant_oasst-sft-1-pythia-12b/ba1129fd-f158-47ad-b194-7cff794b9ef2.json
deleted file mode 100644
index d7f561ee390bf8ffc73929b15884127e31e607b8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OpenAssistant/OpenAssistant_oasst-sft-1-pythia-12b/ba1129fd-f158-47ad-b194-7cff794b9ef2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OpenAssistant_oasst-sft-1-pythia-12b/1762652579.799746",
- "retrieved_timestamp": "1762652579.799747",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OpenAssistant/oasst-sft-1-pythia-12b",
- "developer": "OpenAssistant",
- "inference_platform": "unknown",
- "id": "OpenAssistant/oasst-sft-1-pythia-12b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10553885911603435
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.314662875941371
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015105740181268883
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33269791666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11128656914893617
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GPTNeoXForCausalLM",
- "params_billions": 12.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-falcon3-10b-v24.2-131k/19bba814-812c-49c2-acf1-9d056fd7d62d.json b/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-falcon3-10b-v24.2-131k/19bba814-812c-49c2-acf1-9d056fd7d62d.json
deleted file mode 100644
index 887166c821026052354feaabd50ef8e88d54b511..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-falcon3-10b-v24.2-131k/19bba814-812c-49c2-acf1-9d056fd7d62d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-falcon3-10b-v24.2-131k/1762652579.800029",
- "retrieved_timestamp": "1762652579.80003",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OpenBuddy/openbuddy-falcon3-10b-v24.2-131k",
- "developer": "OpenBuddy",
- "inference_platform": "unknown",
- "id": "OpenBuddy/openbuddy-falcon3-10b-v24.2-131k"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5086315420861093
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6003725722032135
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21299093655589124
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41864583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3833942819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.34
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-mixtral-7bx8-v18.1-32k/247ee47c-e441-4020-97e3-14e3ed8d22c9.json b/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-mixtral-7bx8-v18.1-32k/247ee47c-e441-4020-97e3-14e3ed8d22c9.json
deleted file mode 100644
index 6a881da486e3f69b2761e0ef343e3369ec721464..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-mixtral-7bx8-v18.1-32k/247ee47c-e441-4020-97e3-14e3ed8d22c9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-mixtral-7bx8-v18.1-32k/1762652579.803262",
- "retrieved_timestamp": "1762652579.803263",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k",
- "developer": "OpenBuddy",
- "inference_platform": "unknown",
- "id": "OpenBuddy/openbuddy-mixtral-7bx8-v18.1-32k"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.549347952322061
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46561770563515265
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10800604229607251
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3830520833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38040226063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 46.741
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-nemotron-70b-v23.1-131k/e4e4d8f4-7e49-4b08-8a08-97e4e2c28616.json b/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-nemotron-70b-v23.1-131k/e4e4d8f4-7e49-4b08-8a08-97e4e2c28616.json
deleted file mode 100644
index 304c931c9930b73cea826e3d5d8c2620610f8f43..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-nemotron-70b-v23.1-131k/e4e4d8f4-7e49-4b08-8a08-97e4e2c28616.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-nemotron-70b-v23.1-131k/1762652579.803536",
- "retrieved_timestamp": "1762652579.803537",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OpenBuddy/openbuddy-nemotron-70b-v23.1-131k",
- "developer": "OpenBuddy",
- "inference_platform": "unknown",
- "id": "OpenBuddy/openbuddy-nemotron-70b-v23.1-131k"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7555275557742346
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6749472828128272
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32099697885196377
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36325503355704697
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45375000000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5174534574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-nemotron-70b-v23.2-131k/b34ca7d7-6049-4f4f-a2e3-db736009fa4d.json b/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-nemotron-70b-v23.2-131k/b34ca7d7-6049-4f4f-a2e3-db736009fa4d.json
deleted file mode 100644
index 62840bf59d9cdbfc3eae4c037d6b5f15f9229da9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-nemotron-70b-v23.2-131k/b34ca7d7-6049-4f4f-a2e3-db736009fa4d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-nemotron-70b-v23.2-131k/1762652579.803802",
- "retrieved_timestamp": "1762652579.803806",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OpenBuddy/openbuddy-nemotron-70b-v23.2-131k",
- "developer": "OpenBuddy",
- "inference_platform": "unknown",
- "id": "OpenBuddy/openbuddy-nemotron-70b-v23.2-131k"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7226547782107031
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6704805157570325
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3157099697885196
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3598993288590604
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46959375000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5120511968085106
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-qwq-32b-v24.1-200k/a2b990cd-e692-44fc-8b39-ac91eab85cef.json b/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-qwq-32b-v24.1-200k/a2b990cd-e692-44fc-8b39-ac91eab85cef.json
deleted file mode 100644
index 0ce31864bbd209f6ca3695470f8673cd8a56e7bf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-qwq-32b-v24.1-200k/a2b990cd-e692-44fc-8b39-ac91eab85cef.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-qwq-32b-v24.1-200k/1762652579.804893",
- "retrieved_timestamp": "1762652579.804894",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OpenBuddy/openbuddy-qwq-32b-v24.1-200k",
- "developer": "OpenBuddy",
- "inference_platform": "unknown",
- "id": "OpenBuddy/openbuddy-qwq-32b-v24.1-200k"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.593661484860171
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6798496773637743
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37386706948640486
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.484875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5490359042553191
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-qwq-32b-v24.2-200k/24684939-5eb8-40b1-99dd-1ebe693680fc.json b/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-qwq-32b-v24.2-200k/24684939-5eb8-40b1-99dd-1ebe693680fc.json
deleted file mode 100644
index b7a7e5769990c4d34db5e74d76013f8783bea953..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-qwq-32b-v24.2-200k/24684939-5eb8-40b1-99dd-1ebe693680fc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-qwq-32b-v24.2-200k/1762652579.8051221",
- "retrieved_timestamp": "1762652579.8051221",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OpenBuddy/openbuddy-qwq-32b-v24.2-200k",
- "developer": "OpenBuddy",
- "inference_platform": "unknown",
- "id": "OpenBuddy/openbuddy-qwq-32b-v24.2-200k"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5969837808126881
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6771537576509328
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3776435045317221
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3766778523489933
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47179166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5446309840425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-yi1.5-34b-v21.3-32k/f6a36220-0b31-4b0d-9262-7e0e508e64db.json b/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-yi1.5-34b-v21.3-32k/f6a36220-0b31-4b0d-9262-7e0e508e64db.json
deleted file mode 100644
index 8374c4fcf570ad340dbca360b344e1b62af33ac4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-yi1.5-34b-v21.3-32k/f6a36220-0b31-4b0d-9262-7e0e508e64db.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-yi1.5-34b-v21.3-32k/1762652579.8053398",
- "retrieved_timestamp": "1762652579.805341",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k",
- "developer": "OpenBuddy",
- "inference_platform": "unknown",
- "id": "OpenBuddy/openbuddy-yi1.5-34b-v21.3-32k"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5420041046645123
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6162574860411373
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1782477341389728
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.348993288590604
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44394791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4599401595744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.407
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-zero-14b-v22.3-32k/0e288116-902d-4fef-9020-a3a4dc80e698.json b/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-zero-14b-v22.3-32k/0e288116-902d-4fef-9020-a3a4dc80e698.json
deleted file mode 100644
index 532f084c45e32a4e9ca0453b359b04dc3252ba2c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-zero-14b-v22.3-32k/0e288116-902d-4fef-9020-a3a4dc80e698.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-zero-14b-v22.3-32k/1762652579.805548",
- "retrieved_timestamp": "1762652579.8055491",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OpenBuddy/openbuddy-zero-14b-v22.3-32k",
- "developer": "OpenBuddy",
- "inference_platform": "unknown",
- "id": "OpenBuddy/openbuddy-zero-14b-v22.3-32k"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37529200299649373
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4859759816473639
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09365558912386707
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41660416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3187333776595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.022
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-zero-3b-v21.2-32k/9d135662-43d6-4b05-90cb-5d2c856b0b89.json b/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-zero-3b-v21.2-32k/9d135662-43d6-4b05-90cb-5d2c856b0b89.json
deleted file mode 100644
index 22cd165be67f79fc8d5056dd63fad221ebe1b262..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-zero-3b-v21.2-32k/9d135662-43d6-4b05-90cb-5d2c856b0b89.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-zero-3b-v21.2-32k/1762652579.8057752",
- "retrieved_timestamp": "1762652579.8057752",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OpenBuddy/openbuddy-zero-3b-v21.2-32k",
- "developer": "OpenBuddy",
- "inference_platform": "unknown",
- "id": "OpenBuddy/openbuddy-zero-3b-v21.2-32k"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3802377691192702
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3934791831798414
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0188821752265861
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3566354166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20337433510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 4.769
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-zero-56b-v21.2-32k/7636a893-1404-4257-9778-653f3cfb601b.json b/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-zero-56b-v21.2-32k/7636a893-1404-4257-9778-653f3cfb601b.json
deleted file mode 100644
index 2e4286ec7cdc9d408f1a6bc0ded75d3d2988638f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OpenBuddy/OpenBuddy_openbuddy-zero-56b-v21.2-32k/7636a893-1404-4257-9778-653f3cfb601b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OpenBuddy_openbuddy-zero-56b-v21.2-32k/1762652579.8059928",
- "retrieved_timestamp": "1762652579.805994",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OpenBuddy/openbuddy-zero-56b-v21.2-32k",
- "developer": "OpenBuddy",
- "inference_platform": "unknown",
- "id": "OpenBuddy/openbuddy-zero-56b-v21.2-32k"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5057092957796425
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6128345897750148
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16238670694864046
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3179530201342282
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4305208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43991023936170215
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 56.707
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OpenGenerativeAI/OpenGenerativeAI_Bifrost-14B/cde00174-ac52-42da-9641-0866739232e4.json b/leaderboard_data/HFOpenLLMv2/OpenGenerativeAI/OpenGenerativeAI_Bifrost-14B/cde00174-ac52-42da-9641-0866739232e4.json
deleted file mode 100644
index 9428cbae554bba346791fb2957d0defda55d4907..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OpenGenerativeAI/OpenGenerativeAI_Bifrost-14B/cde00174-ac52-42da-9641-0866739232e4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OpenGenerativeAI_Bifrost-14B/1762652579.806474",
- "retrieved_timestamp": "1762652579.806475",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OpenGenerativeAI/Bifrost-14B",
- "developer": "OpenGenerativeAI",
- "inference_platform": "unknown",
- "id": "OpenGenerativeAI/Bifrost-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6615302951723648
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6844897889249308
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23564954682779457
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37919463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46239583333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5073969414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OpenGenerativeAI/OpenGenerativeAI_Bifrost/cef8e01a-071d-4ee4-997b-44679ef5b56e.json b/leaderboard_data/HFOpenLLMv2/OpenGenerativeAI/OpenGenerativeAI_Bifrost/cef8e01a-071d-4ee4-997b-44679ef5b56e.json
deleted file mode 100644
index 3ec1da6089cf7a4d38c6559c60137beb65bd4fa4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OpenGenerativeAI/OpenGenerativeAI_Bifrost/cef8e01a-071d-4ee4-997b-44679ef5b56e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OpenGenerativeAI_Bifrost/1762652579.8062131",
- "retrieved_timestamp": "1762652579.8062139",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OpenGenerativeAI/Bifrost",
- "developer": "OpenGenerativeAI",
- "inference_platform": "unknown",
- "id": "OpenGenerativeAI/Bifrost"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6347524568145853
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6849273974523276
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2545317220543807
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36828859060402686
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45976041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5159574468085106
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B-Instruct-human-data/26787f2b-8f30-4cc8-b39e-447b8c53aa85.json b/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B-Instruct-human-data/26787f2b-8f30-4cc8-b39e-447b8c53aa85.json
deleted file mode 100644
index 6770c161de940ac64c5f75657a96077750b9f66c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B-Instruct-human-data/26787f2b-8f30-4cc8-b39e-447b8c53aa85.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OpenLLM-France_Lucie-7B-Instruct-human-data/1762652579.8072178",
- "retrieved_timestamp": "1762652579.807219",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OpenLLM-France/Lucie-7B-Instruct-human-data",
- "developer": "OpenLLM-France",
- "inference_platform": "unknown",
- "id": "OpenLLM-France/Lucie-7B-Instruct-human-data"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29460830596151544
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32842533479733
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02190332326283988
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2751677852348993
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37285416666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14295212765957446
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.707
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B-Instruct-v1.1/e94a0550-93fa-448a-a4a4-187fd1b7d24e.json b/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B-Instruct-v1.1/e94a0550-93fa-448a-a4a4-187fd1b7d24e.json
deleted file mode 100644
index e185a959fa4bb3ef2bb6445df05c7048afe34f68..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B-Instruct-v1.1/e94a0550-93fa-448a-a4a4-187fd1b7d24e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OpenLLM-France_Lucie-7B-Instruct-v1.1/1762652579.807442",
- "retrieved_timestamp": "1762652579.807442",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OpenLLM-France/Lucie-7B-Instruct-v1.1",
- "developer": "OpenLLM-France",
- "inference_platform": "unknown",
- "id": "OpenLLM-France/Lucie-7B-Instruct-v1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3038759380665523
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38158765227444885
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03172205438066465
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28187919463087246
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37502083333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1864195478723404
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.707
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B-Instruct/af17be77-0ae3-4b90-ba85-a4886450cd43.json b/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B-Instruct/af17be77-0ae3-4b90-ba85-a4886450cd43.json
deleted file mode 100644
index ad2f63b72698151bdb18e82828088d3941f55760..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B-Instruct/af17be77-0ae3-4b90-ba85-a4886450cd43.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OpenLLM-France_Lucie-7B-Instruct/1762652579.806944",
- "retrieved_timestamp": "1762652579.806945",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OpenLLM-France/Lucie-7B-Instruct",
- "developer": "OpenLLM-France",
- "inference_platform": "unknown",
- "id": "OpenLLM-France/Lucie-7B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.279645784296777
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3254036581260458
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01661631419939577
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36621875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15558510638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.707
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B/01e4cd19-4f1f-4c30-b80f-e1d287d5d7c2.json b/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B/01e4cd19-4f1f-4c30-b80f-e1d287d5d7c2.json
deleted file mode 100644
index 15a48f9e0d38b275c7d891d32bf5fb1e12df15ee..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/OpenLLM-France/OpenLLM-France_Lucie-7B/01e4cd19-4f1f-4c30-b80f-e1d287d5d7c2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/OpenLLM-France_Lucie-7B/1762652579.806693",
- "retrieved_timestamp": "1762652579.8066938",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "OpenLLM-France/Lucie-7B",
- "developer": "OpenLLM-France",
- "inference_platform": "unknown",
- "id": "OpenLLM-France/Lucie-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24964538535530173
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3492469872973046
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.014350453172205438
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39232291666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14976728723404256
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.707
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Orion-zhen/Orion-zhen_Qwen2.5-7B-Instruct-Uncensored/141239bb-c7e3-4c38-b289-12cd59f592d2.json b/leaderboard_data/HFOpenLLMv2/Orion-zhen/Orion-zhen_Qwen2.5-7B-Instruct-Uncensored/141239bb-c7e3-4c38-b289-12cd59f592d2.json
deleted file mode 100644
index 88d90a83011223ec30fb304c31b2dc8af853e8c8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Orion-zhen/Orion-zhen_Qwen2.5-7B-Instruct-Uncensored/141239bb-c7e3-4c38-b289-12cd59f592d2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Orion-zhen_Qwen2.5-7B-Instruct-Uncensored/1762652579.808624",
- "retrieved_timestamp": "1762652579.808625",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Orion-zhen/Qwen2.5-7B-Instruct-Uncensored",
- "developer": "Orion-zhen",
- "inference_platform": "unknown",
- "id": "Orion-zhen/Qwen2.5-7B-Instruct-Uncensored"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7204317876567508
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5473918652157296
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4773413897280967
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43613541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4426529255319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/P0x0/P0x0_Astra-v1-12B/349ae5f5-55d0-4486-a6dc-2b5644fac045.json b/leaderboard_data/HFOpenLLMv2/P0x0/P0x0_Astra-v1-12B/349ae5f5-55d0-4486-a6dc-2b5644fac045.json
deleted file mode 100644
index 6df3f43b931ab75b4aabd09bcd46544ba64291b1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/P0x0/P0x0_Astra-v1-12B/349ae5f5-55d0-4486-a6dc-2b5644fac045.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/P0x0_Astra-v1-12B/1762652579.8091059",
- "retrieved_timestamp": "1762652579.8091059",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "P0x0/Astra-v1-12B",
- "developer": "P0x0",
- "inference_platform": "unknown",
- "id": "P0x0/Astra-v1-12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28059437847134494
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5214506484138984
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11329305135951662
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4051875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3460771276595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_L3.2-Instruct-Thinking-v0.1-1B/3c942d2f-0b53-498e-ab05-71d5075cb974.json b/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_L3.2-Instruct-Thinking-v0.1-1B/3c942d2f-0b53-498e-ab05-71d5075cb974.json
deleted file mode 100644
index 60be403926202b4e22ae0ca1438a53af85bac341..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_L3.2-Instruct-Thinking-v0.1-1B/3c942d2f-0b53-498e-ab05-71d5075cb974.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/PJMixers-Dev_L3.2-Instruct-Thinking-v0.1-1B/1762652579.8095942",
- "retrieved_timestamp": "1762652579.8095949",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B",
- "developer": "PJMixers-Dev",
- "inference_platform": "unknown",
- "id": "PJMixers-Dev/L3.2-Instruct-Thinking-v0.1-1B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46276989498973836
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33018063718974094
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.054380664652567974
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32621875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14827127659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.236
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.1-Instruct-Interleaved-Zeroed-13B/fb66b283-bfd6-4437-95b7-d74a0d8d2814.json b/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.1-Instruct-Interleaved-Zeroed-13B/fb66b283-bfd6-4437-95b7-d74a0d8d2814.json
deleted file mode 100644
index bf7148a99291f94e3c438830f4edd551e7bed424..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.1-Instruct-Interleaved-Zeroed-13B/fb66b283-bfd6-4437-95b7-d74a0d8d2814.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.1-Instruct-Interleaved-Zeroed-13B/1762652579.809847",
- "retrieved_timestamp": "1762652579.809848",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B",
- "developer": "PJMixers-Dev",
- "inference_platform": "unknown",
- "id": "PJMixers-Dev/LLaMa-3.1-Instruct-Interleaved-Zeroed-13B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7871015572015585
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5073267838961463
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2001510574018127
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3869895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3767453457446808
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 13.047
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B/1d91cdce-0bdb-4567-9296-6225db3aa0bc.json b/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B/1d91cdce-0bdb-4567-9296-6225db3aa0bc.json
deleted file mode 100644
index 4e906300c92f39433ff936340ef8e34d6ccb52cc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B/1d91cdce-0bdb-4567-9296-6225db3aa0bc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B/1762652579.8105159",
- "retrieved_timestamp": "1762652579.810517",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B",
- "developer": "PJMixers-Dev",
- "inference_platform": "unknown",
- "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.1-SFT-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.693054428915278
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4556166737589294
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1216012084592145
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37003125000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.312749335106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B/d1875dfd-05ab-4a49-8c7f-02cddf35a695.json b/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B/d1875dfd-05ab-4a49-8c7f-02cddf35a695.json
deleted file mode 100644
index df463114b5036820c8e69b4277c6de1756420faf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B/d1875dfd-05ab-4a49-8c7f-02cddf35a695.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B/1762652579.810729",
- "retrieved_timestamp": "1762652579.81073",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B",
- "developer": "PJMixers-Dev",
- "inference_platform": "unknown",
- "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6291573026237051
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45814952191015346
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1299093655589124
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.365875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3115026595744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B/62b12d95-1da2-407c-8552-8c5e951c5c85.json b/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B/62b12d95-1da2-407c-8552-8c5e951c5c85.json
deleted file mode 100644
index 8461c682266fd5e68aa95b7a907ab862e475f479..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B/62b12d95-1da2-407c-8552-8c5e951c5c85.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B/1762652579.8109388",
- "retrieved_timestamp": "1762652579.8109398",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B",
- "developer": "PJMixers-Dev",
- "inference_platform": "unknown",
- "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMix-v0.2-SFT-HailMary-v0.1-KTO-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6503898544750152
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45107942950222196
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12613293051359517
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27181208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3687291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3107546542553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMixBread-v0.1-3B/56f36430-4bb1-425d-ac4b-30d85237667c.json b/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMixBread-v0.1-3B/56f36430-4bb1-425d-ac4b-30d85237667c.json
deleted file mode 100644
index 22cc4533d19cbac1f35c0a07d4bf3becf0920f16..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/PJMixers-Dev/PJMixers-Dev_LLaMa-3.2-Instruct-JankMixBread-v0.1-3B/56f36430-4bb1-425d-ac4b-30d85237667c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/PJMixers-Dev_LLaMa-3.2-Instruct-JankMixBread-v0.1-3B/1762652579.8111491",
- "retrieved_timestamp": "1762652579.81115",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B",
- "developer": "PJMixers-Dev",
- "inference_platform": "unknown",
- "id": "PJMixers-Dev/LLaMa-3.2-Instruct-JankMixBread-v0.1-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5040858256093831
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4483158594793648
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13066465256797583
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3515520833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.308344414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Parissa3/Parissa3_test-model/53cb44c7-f7bc-40fa-88e7-511b9dfab004.json b/leaderboard_data/HFOpenLLMv2/Parissa3/Parissa3_test-model/53cb44c7-f7bc-40fa-88e7-511b9dfab004.json
deleted file mode 100644
index f200d621a8958f8ddb3aea03d67bf6095596d2a7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Parissa3/Parissa3_test-model/53cb44c7-f7bc-40fa-88e7-511b9dfab004.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Parissa3_test-model/1762652579.811859",
- "retrieved_timestamp": "1762652579.81186",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Parissa3/test-model",
- "developer": "Parissa3",
- "inference_platform": "unknown",
- "id": "Parissa3/test-model"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3882564927725103
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5193916761801759
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0649546827794562
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46853125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3056848404255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Pinkstack/Pinkstack_SuperThoughts-CoT-14B-16k-o1-QwQ/c604f0fb-517d-45db-9e1c-6c911bce43e7.json b/leaderboard_data/HFOpenLLMv2/Pinkstack/Pinkstack_SuperThoughts-CoT-14B-16k-o1-QwQ/c604f0fb-517d-45db-9e1c-6c911bce43e7.json
deleted file mode 100644
index 6b1077cfc1a27c367c934b6d3caabdfdee75d643..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Pinkstack/Pinkstack_SuperThoughts-CoT-14B-16k-o1-QwQ/c604f0fb-517d-45db-9e1c-6c911bce43e7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Pinkstack_SuperThoughts-CoT-14B-16k-o1-QwQ/1762652579.812447",
- "retrieved_timestamp": "1762652579.812449",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ",
- "developer": "Pinkstack",
- "inference_platform": "unknown",
- "id": "Pinkstack/SuperThoughts-CoT-14B-16k-o1-QwQ"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.051457909458015844
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6719989821162488
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4199395770392749
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3926174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4913541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.526845079787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Pinkstack/Pinkstack_Superthoughts-lite-1.8B-experimental-o1/fba2ce2f-6c30-4af9-ae3a-d23f39f3f963.json b/leaderboard_data/HFOpenLLMv2/Pinkstack/Pinkstack_Superthoughts-lite-1.8B-experimental-o1/fba2ce2f-6c30-4af9-ae3a-d23f39f3f963.json
deleted file mode 100644
index 429cf3f9aeb80fc81db8fa9086db51a6f80fb368..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Pinkstack/Pinkstack_Superthoughts-lite-1.8B-experimental-o1/fba2ce2f-6c30-4af9-ae3a-d23f39f3f963.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Pinkstack_Superthoughts-lite-1.8B-experimental-o1/1762652579.81273",
- "retrieved_timestamp": "1762652579.81273",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Pinkstack/Superthoughts-lite-1.8B-experimental-o1",
- "developer": "Pinkstack",
- "inference_platform": "unknown",
- "id": "Pinkstack/Superthoughts-lite-1.8B-experimental-o1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0375193375798437
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3434736647957908
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03172205438066465
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2751677852348993
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33539583333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18508976063829788
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.812
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Pinkstack/Pinkstack_Superthoughts-lite-v1/ff308837-dc35-4257-a4cd-de463feb733e.json b/leaderboard_data/HFOpenLLMv2/Pinkstack/Pinkstack_Superthoughts-lite-v1/ff308837-dc35-4257-a4cd-de463feb733e.json
deleted file mode 100644
index 853df06e6858a6cc43b7e5ac6d6c73d5de2ce043..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Pinkstack/Pinkstack_Superthoughts-lite-v1/ff308837-dc35-4257-a4cd-de463feb733e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Pinkstack_Superthoughts-lite-v1/1762652579.812961",
- "retrieved_timestamp": "1762652579.812962",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Pinkstack/Superthoughts-lite-v1",
- "developer": "Pinkstack",
- "inference_platform": "unknown",
- "id": "Pinkstack/Superthoughts-lite-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1658643510330368
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3465571905256149
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02945619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3671770833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17553191489361702
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.711
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-Instruct-CoreCurriculum-12b/d8145a39-f1d0-4b6e-958b-a96585eeec9f.json b/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-Instruct-CoreCurriculum-12b/d8145a39-f1d0-4b6e-958b-a96585eeec9f.json
deleted file mode 100644
index 05ae87b104df80c2f5792134db29b0940d1b6a22..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-Instruct-CoreCurriculum-12b/d8145a39-f1d0-4b6e-958b-a96585eeec9f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-Instruct-CoreCurriculum-12b/1762652579.81328",
- "retrieved_timestamp": "1762652579.813282",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "PocketDoc/Dans-Instruct-CoreCurriculum-12b",
- "developer": "PocketDoc",
- "inference_platform": "unknown",
- "id": "PocketDoc/Dans-Instruct-CoreCurriculum-12b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21914520139895477
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3788739075240266
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.054380664652567974
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4095625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1219248670212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-PersonalityEngine-V1.1.0-12b/c005ab13-1d42-4e28-802e-12438aab35a4.json b/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-PersonalityEngine-V1.1.0-12b/c005ab13-1d42-4e28-802e-12438aab35a4.json
deleted file mode 100644
index c5296ef42f5cbe90845846bd940009cfa54ee7f9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-PersonalityEngine-V1.1.0-12b/c005ab13-1d42-4e28-802e-12438aab35a4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-PersonalityEngine-V1.1.0-12b/1762652579.813654",
- "retrieved_timestamp": "1762652579.8136551",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "PocketDoc/Dans-PersonalityEngine-V1.1.0-12b",
- "developer": "PocketDoc",
- "inference_platform": "unknown",
- "id": "PocketDoc/Dans-PersonalityEngine-V1.1.0-12b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7074672978807343
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5361046243199591
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10498489425981873
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28691275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45867708333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32621343085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-PersonalityEngine-V1.2.0-24b/38dd1b21-b357-4daf-94b3-c4a28809e56c.json b/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-PersonalityEngine-V1.2.0-24b/38dd1b21-b357-4daf-94b3-c4a28809e56c.json
deleted file mode 100644
index 5a9b6f084f4ca0039a2a77248b80ef81dadd0362..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-PersonalityEngine-V1.2.0-24b/38dd1b21-b357-4daf-94b3-c4a28809e56c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-PersonalityEngine-V1.2.0-24b/1762652579.813962",
- "retrieved_timestamp": "1762652579.813962",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "PocketDoc/Dans-PersonalityEngine-V1.2.0-24b",
- "developer": "PocketDoc",
- "inference_platform": "unknown",
- "id": "PocketDoc/Dans-PersonalityEngine-V1.2.0-24b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7886252920029965
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6421213844206719
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24546827794561935
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3187919463087248
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42996875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5025764627659575
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 23.572
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-PersonalityEngine-v1.0.0-8b/f3623b9f-3e3f-4b7b-a9f5-f0a15bf26f48.json b/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-PersonalityEngine-v1.0.0-8b/f3623b9f-3e3f-4b7b-a9f5-f0a15bf26f48.json
deleted file mode 100644
index ae08b2e868ab1975346909b39601a119db23f60d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-PersonalityEngine-v1.0.0-8b/f3623b9f-3e3f-4b7b-a9f5-f0a15bf26f48.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-PersonalityEngine-v1.0.0-8b/1762652579.814201",
- "retrieved_timestamp": "1762652579.814202",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "PocketDoc/Dans-PersonalityEngine-v1.0.0-8b",
- "developer": "PocketDoc",
- "inference_platform": "unknown",
- "id": "PocketDoc/Dans-PersonalityEngine-v1.0.0-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.498190357141274
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47325544259149366
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08157099697885196
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35415625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3065159574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-SakuraKaze-V1.0.0-12b/b78ef40e-91b1-401d-9576-1ac2f600b32a.json b/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-SakuraKaze-V1.0.0-12b/b78ef40e-91b1-401d-9576-1ac2f600b32a.json
deleted file mode 100644
index 529f1f5678785beebd9d3a193fcc172e680682ba..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/PocketDoc/PocketDoc_Dans-SakuraKaze-V1.0.0-12b/b78ef40e-91b1-401d-9576-1ac2f600b32a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/PocketDoc_Dans-SakuraKaze-V1.0.0-12b/1762652579.81442",
- "retrieved_timestamp": "1762652579.81442",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "PocketDoc/Dans-SakuraKaze-V1.0.0-12b",
- "developer": "PocketDoc",
- "inference_platform": "unknown",
- "id": "PocketDoc/Dans-SakuraKaze-V1.0.0-12b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6520133246452745
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5405357251132225
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09290030211480363
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47452083333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35596742021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/PowerInfer/PowerInfer_SmallThinker-3B-Preview/6613aff7-8f26-4b74-b08b-37fbd7990e42.json b/leaderboard_data/HFOpenLLMv2/PowerInfer/PowerInfer_SmallThinker-3B-Preview/6613aff7-8f26-4b74-b08b-37fbd7990e42.json
deleted file mode 100644
index d5bbfefc0dc1fa8c6891e69fd6b58ecbdc2596a7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/PowerInfer/PowerInfer_SmallThinker-3B-Preview/6613aff7-8f26-4b74-b08b-37fbd7990e42.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/PowerInfer_SmallThinker-3B-Preview/1762652579.814635",
- "retrieved_timestamp": "1762652579.814636",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "PowerInfer/SmallThinker-3B-Preview",
- "developer": "PowerInfer",
- "inference_platform": "unknown",
- "id": "PowerInfer/SmallThinker-3B-Preview"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6199650261306666
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4494922016660919
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27794561933534745
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3524791666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3017785904255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.397
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/PranavHarshan/PranavHarshan_MedNarra-X1/86023703-88e2-4219-b38b-4c871e2ee381.json b/leaderboard_data/HFOpenLLMv2/PranavHarshan/PranavHarshan_MedNarra-X1/86023703-88e2-4219-b38b-4c871e2ee381.json
deleted file mode 100644
index eb55bd3699287f0f9c4121f7990258561865e7ad..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/PranavHarshan/PranavHarshan_MedNarra-X1/86023703-88e2-4219-b38b-4c871e2ee381.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/PranavHarshan_MedNarra-X1/1762652579.815135",
- "retrieved_timestamp": "1762652579.815136",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "PranavHarshan/MedNarra-X1",
- "developer": "PranavHarshan",
- "inference_platform": "unknown",
- "id": "PranavHarshan/MedNarra-X1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43384331351924005
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46371668179774184
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04380664652567976
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35403125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34308510638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_10.7B_48Layers-Appended/eca9180f-20d5-4bcd-9a74-e2f69c4ea4ad.json b/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_10.7B_48Layers-Appended/eca9180f-20d5-4bcd-9a74-e2f69c4ea4ad.json
deleted file mode 100644
index 5768a1c660f423a084bc2dcd24f0d007428bc5ba..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_10.7B_48Layers-Appended/eca9180f-20d5-4bcd-9a74-e2f69c4ea4ad.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_10.7B_48Layers-Appended/1762652579.815407",
- "retrieved_timestamp": "1762652579.815407",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended",
- "developer": "Pretergeek",
- "inference_platform": "unknown",
- "id": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Appended"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5960595663949432
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4619637884426022
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07930513595166164
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42540625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3289561170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 10.732
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_10.7B_48Layers-Interleaved/65d32305-4f23-4041-a107-8625822c1322.json b/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_10.7B_48Layers-Interleaved/65d32305-4f23-4041-a107-8625822c1322.json
deleted file mode 100644
index 216cca5fee0e6e0cb43c4b6085f39253b900ebe9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_10.7B_48Layers-Interleaved/65d32305-4f23-4041-a107-8625822c1322.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_10.7B_48Layers-Interleaved/1762652579.81567",
- "retrieved_timestamp": "1762652579.815671",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved",
- "developer": "Pretergeek",
- "inference_platform": "unknown",
- "id": "Pretergeek/OpenChat-3.5-0106_10.7B_48Layers-Interleaved"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5960595663949432
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4619637884426022
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07779456193353475
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42540625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3298703457446808
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 10.732
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_32K-PoSE/195acbac-1db7-47ed-907f-98e312fc8921.json b/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_32K-PoSE/195acbac-1db7-47ed-907f-98e312fc8921.json
deleted file mode 100644
index 0a862b2d0dd0042e6893184157bb580510053095..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_32K-PoSE/195acbac-1db7-47ed-907f-98e312fc8921.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_32K-PoSE/1762652579.815889",
- "retrieved_timestamp": "1762652579.8158898",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Pretergeek/OpenChat-3.5-0106_32K-PoSE",
- "developer": "Pretergeek",
- "inference_platform": "unknown",
- "id": "Pretergeek/OpenChat-3.5-0106_32K-PoSE"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3968991165662664
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3471309425137119
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.026435045317220542
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42054166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.203125
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.11B_36Layers-Appended/349bccfd-1816-4845-a1b9-2d9f4936adea.json b/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.11B_36Layers-Appended/349bccfd-1816-4845-a1b9-2d9f4936adea.json
deleted file mode 100644
index 4fde5126f3534bb98e4051c5c5df9cd67484b7a5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.11B_36Layers-Appended/349bccfd-1816-4845-a1b9-2d9f4936adea.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_8.11B_36Layers-Appended/1762652579.8160908",
- "retrieved_timestamp": "1762652579.8160908",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended",
- "developer": "Pretergeek",
- "inference_platform": "unknown",
- "id": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Appended"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5975833011963811
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4619637884426022
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07930513595166164
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42540625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3289561170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 8.114
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.11B_36Layers-Interleaved/c2e26b8a-3a12-4cb8-888e-96affc8cbac9.json b/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.11B_36Layers-Interleaved/c2e26b8a-3a12-4cb8-888e-96affc8cbac9.json
deleted file mode 100644
index 71666bfdd876c353b932ca60ed5fe0e0ebe2b5cf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.11B_36Layers-Interleaved/c2e26b8a-3a12-4cb8-888e-96affc8cbac9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_8.11B_36Layers-Interleaved/1762652579.8163",
- "retrieved_timestamp": "1762652579.8163",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved",
- "developer": "Pretergeek",
- "inference_platform": "unknown",
- "id": "Pretergeek/OpenChat-3.5-0106_8.11B_36Layers-Interleaved"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5960595663949432
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46213045510926887
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07779456193353475
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42407291666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3298703457446808
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 8.114
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.99B_40Layers-Appended/a70222dc-0589-4f09-ac8c-3ff4fa72328f.json b/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.99B_40Layers-Appended/a70222dc-0589-4f09-ac8c-3ff4fa72328f.json
deleted file mode 100644
index bff4077605cdc0dfc5e05d40dbab3c4292c35253..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.99B_40Layers-Appended/a70222dc-0589-4f09-ac8c-3ff4fa72328f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_8.99B_40Layers-Appended/1762652579.81651",
- "retrieved_timestamp": "1762652579.816511",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended",
- "developer": "Pretergeek",
- "inference_platform": "unknown",
- "id": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Appended"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5960595663949432
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4619637884426022
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07930513595166164
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42540625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3289561170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 8.987
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.99B_40Layers-Interleaved/19eb8f3a-ca9d-4da4-8e7e-96eebfd33576.json b/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.99B_40Layers-Interleaved/19eb8f3a-ca9d-4da4-8e7e-96eebfd33576.json
deleted file mode 100644
index 28b86b5201c93aef4d63e94d29036ef57fa769eb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_8.99B_40Layers-Interleaved/19eb8f3a-ca9d-4da4-8e7e-96eebfd33576.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_8.99B_40Layers-Interleaved/1762652579.816719",
- "retrieved_timestamp": "1762652579.816719",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved",
- "developer": "Pretergeek",
- "inference_platform": "unknown",
- "id": "Pretergeek/OpenChat-3.5-0106_8.99B_40Layers-Interleaved"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5975833011963811
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46213045510926887
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07779456193353475
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42407291666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3298703457446808
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 8.987
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_9.86B_44Layers-Appended/e44eddb9-9764-4bc9-be85-ec7995846da0.json b/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_9.86B_44Layers-Appended/e44eddb9-9764-4bc9-be85-ec7995846da0.json
deleted file mode 100644
index c95aa6009361fa7d6ecb14cdb27ca0103d104556..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Pretergeek/Pretergeek_OpenChat-3.5-0106_9.86B_44Layers-Appended/e44eddb9-9764-4bc9-be85-ec7995846da0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Pretergeek_OpenChat-3.5-0106_9.86B_44Layers-Appended/1762652579.816936",
- "retrieved_timestamp": "1762652579.816937",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended",
- "developer": "Pretergeek",
- "inference_platform": "unknown",
- "id": "Pretergeek/OpenChat-3.5-0106_9.86B_44Layers-Appended"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5960595663949432
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4619637884426022
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07930513595166164
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42540625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3289561170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 9.859
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/PrimeIntellect/PrimeIntellect_INTELLECT-1-Instruct/ea823c15-3c92-4a67-a4fd-7826a9dd9e41.json b/leaderboard_data/HFOpenLLMv2/PrimeIntellect/PrimeIntellect_INTELLECT-1-Instruct/ea823c15-3c92-4a67-a4fd-7826a9dd9e41.json
deleted file mode 100644
index 96d677d008e53429be110bc95c2d4ee8c6837991..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/PrimeIntellect/PrimeIntellect_INTELLECT-1-Instruct/ea823c15-3c92-4a67-a4fd-7826a9dd9e41.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/PrimeIntellect_INTELLECT-1-Instruct/1762652579.817848",
- "retrieved_timestamp": "1762652579.8178492",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "PrimeIntellect/INTELLECT-1-Instruct",
- "developer": "PrimeIntellect",
- "inference_platform": "unknown",
- "id": "PrimeIntellect/INTELLECT-1-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28698007801214714
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.022658610271903322
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2483221476510067
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3576875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10638297872340426
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.211
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/PrimeIntellect/PrimeIntellect_INTELLECT-1/bfffc240-22ab-4cc0-97c8-466ddf472ac4.json b/leaderboard_data/HFOpenLLMv2/PrimeIntellect/PrimeIntellect_INTELLECT-1/bfffc240-22ab-4cc0-97c8-466ddf472ac4.json
deleted file mode 100644
index 5bd185ec440f5514a0506ba65bacd5dc690a8c6f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/PrimeIntellect/PrimeIntellect_INTELLECT-1/bfffc240-22ab-4cc0-97c8-466ddf472ac4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/PrimeIntellect_INTELLECT-1/1762652579.8176599",
- "retrieved_timestamp": "1762652579.817661",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "PrimeIntellect/INTELLECT-1",
- "developer": "PrimeIntellect",
- "inference_platform": "unknown",
- "id": "PrimeIntellect/INTELLECT-1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1757315035217667
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27398007801214713
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3752708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11203457446808511
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.211
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/PrimeIntellect/PrimeIntellect_INTELLECT-1/fee7966f-3e1b-43d9-b129-b0c23aac53b5.json b/leaderboard_data/HFOpenLLMv2/PrimeIntellect/PrimeIntellect_INTELLECT-1/fee7966f-3e1b-43d9-b129-b0c23aac53b5.json
deleted file mode 100644
index 8105f4c3a8dcc5b68186d69d85f417276429f1e3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/PrimeIntellect/PrimeIntellect_INTELLECT-1/fee7966f-3e1b-43d9-b129-b0c23aac53b5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/PrimeIntellect_INTELLECT-1/1762652579.817406",
- "retrieved_timestamp": "1762652579.817406",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "PrimeIntellect/INTELLECT-1",
- "developer": "PrimeIntellect",
- "inference_platform": "unknown",
- "id": "PrimeIntellect/INTELLECT-1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1757315035217667
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27598007801214713
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2533557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3339375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11228390957446809
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.211
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/PuxAI/PuxAI_LUA_model/05dc0500-be97-456f-9d12-12192626ea39.json b/leaderboard_data/HFOpenLLMv2/PuxAI/PuxAI_LUA_model/05dc0500-be97-456f-9d12-12192626ea39.json
deleted file mode 100644
index 1b0deb710b2b0b1eb3cf432c296e98930f3bbdbd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/PuxAI/PuxAI_LUA_model/05dc0500-be97-456f-9d12-12192626ea39.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/PuxAI_LUA_model/1762652579.818059",
- "retrieved_timestamp": "1762652579.818059",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "PuxAI/LUA_model",
- "developer": "PuxAI",
- "inference_platform": "unknown",
- "id": "PuxAI/LUA_model"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22821336276634885
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2876778102988436
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34838541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11228390957446809
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.386
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/PygmalionAI/PygmalionAI_pygmalion-6b/7cdfef58-c871-4158-b97d-ed843f7d667b.json b/leaderboard_data/HFOpenLLMv2/PygmalionAI/PygmalionAI_pygmalion-6b/7cdfef58-c871-4158-b97d-ed843f7d667b.json
deleted file mode 100644
index 5f1d57295d2f3fa91f70a24bf99a8ec062607e4f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/PygmalionAI/PygmalionAI_pygmalion-6b/7cdfef58-c871-4158-b97d-ed843f7d667b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/PygmalionAI_pygmalion-6b/1762652579.818316",
- "retrieved_timestamp": "1762652579.8183172",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "PygmalionAI/pygmalion-6b",
- "developer": "PygmalionAI",
- "inference_platform": "unknown",
- "id": "PygmalionAI/pygmalion-6b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20910406610016974
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31988944643860034
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.008308157099697885
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24916107382550334
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3683541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11835106382978723
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GPTJForCausalLM",
- "params_billions": 6.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Q-bert/Q-bert_MetaMath-1B/713b1c64-9637-4d83-aee9-f81988fec0b5.json b/leaderboard_data/HFOpenLLMv2/Q-bert/Q-bert_MetaMath-1B/713b1c64-9637-4d83-aee9-f81988fec0b5.json
deleted file mode 100644
index 730a14cc1f3214be4422af0ddb80fbfe1fb63980..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Q-bert/Q-bert_MetaMath-1B/713b1c64-9637-4d83-aee9-f81988fec0b5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Q-bert_MetaMath-1B/1762652579.8185658",
- "retrieved_timestamp": "1762652579.8185658",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Q-bert/MetaMath-1B",
- "developer": "Q-bert",
- "inference_platform": "unknown",
- "id": "Q-bert/MetaMath-1B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5300391849182392
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34506863677929517
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06268882175226587
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2516778523489933
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3289166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1495179521276596
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.236
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_1up-14b/c315527d-ea14-42a8-a002-4bb67c085fc0.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_1up-14b/c315527d-ea14-42a8-a002-4bb67c085fc0.json
deleted file mode 100644
index c1f8f18531c103ce6589cc5820c1275b1532c0d0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_1up-14b/c315527d-ea14-42a8-a002-4bb67c085fc0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_1up-14b/1762652579.818811",
- "retrieved_timestamp": "1762652579.818812",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/1up-14b",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/1up-14b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6888079185450161
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6920935635451656
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4161631419939577
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3624161073825503
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4583333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5406416223404256
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Adamant-14B-sce/7ed9dcc6-7915-4a7e-a190-07e067d2fd79.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Adamant-14B-sce/7ed9dcc6-7915-4a7e-a190-07e067d2fd79.json
deleted file mode 100644
index 4d0f05d41f6bf0b4a69d5b5b2abbfa682da1d36f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Adamant-14B-sce/7ed9dcc6-7915-4a7e-a190-07e067d2fd79.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Adamant-14B-sce/1762652579.819103",
- "retrieved_timestamp": "1762652579.819104",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Adamant-14B-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Adamant-14B-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6857604489421402
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6858943778247303
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3987915407854985
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35067114093959734
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45579166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5371509308510638
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Alice-14B/3dd99496-1274-439f-b7c2-1fd731745753.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Alice-14B/3dd99496-1274-439f-b7c2-1fd731745753.json
deleted file mode 100644
index 5bedfd990cfaeeaa579164a129ffd31186732f3e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Alice-14B/3dd99496-1274-439f-b7c2-1fd731745753.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Alice-14B/1762652579.819317",
- "retrieved_timestamp": "1762652579.819317",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Alice-14B",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Alice-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6836371937570092
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6937748567349198
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4569486404833837
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35151006711409394
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44794791666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5418882978723404
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Alien-CoT-14B-sce/dc89616f-c86d-41d0-9945-12703dc8f905.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Alien-CoT-14B-sce/dc89616f-c86d-41d0-9945-12703dc8f905.json
deleted file mode 100644
index 6143cdc30052ac2ea82b7b6b7b9b7315bf10eb30..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Alien-CoT-14B-sce/dc89616f-c86d-41d0-9945-12703dc8f905.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Alien-CoT-14B-sce/1762652579.819517",
- "retrieved_timestamp": "1762652579.8195179",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Alien-CoT-14B-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Alien-CoT-14B-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07486358417886763
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6395487523790632
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.520392749244713
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39177852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47852083333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5170378989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Aura-8B-Linear/2d22ab53-547d-41bb-8700-12bc5b16c97d.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Aura-8B-Linear/2d22ab53-547d-41bb-8700-12bc5b16c97d.json
deleted file mode 100644
index 807d541bc0f68c3133ae63abecfa0b70a71be730..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Aura-8B-Linear/2d22ab53-547d-41bb-8700-12bc5b16c97d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Aura-8B-Linear/1762652579.819725",
- "retrieved_timestamp": "1762652579.819726",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Aura-8B-Linear",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Aura-8B-Linear"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.794770098893159
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5074298101934884
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18051359516616314
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26929530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3686979166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3800698138297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Casa-14b-sce/09bbb732-62d8-4cec-972a-273b728df1f4.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Casa-14b-sce/09bbb732-62d8-4cec-972a-273b728df1f4.json
deleted file mode 100644
index 026e7f57ab07aa7c8bc07c5a229a9f0d1aa5ef6c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Casa-14b-sce/09bbb732-62d8-4cec-972a-273b728df1f4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Casa-14b-sce/1762652579.8199282",
- "retrieved_timestamp": "1762652579.8199282",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Casa-14b-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Casa-14b-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6653523761397536
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6901033460664828
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4697885196374622
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33305369127516776
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43102083333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5425531914893617
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Casa-14b-sce/a0dde1eb-a763-4568-8122-1b280dedb2ce.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Casa-14b-sce/a0dde1eb-a763-4568-8122-1b280dedb2ce.json
deleted file mode 100644
index 2ff50b9e9687edc77f883072407490c5e451cf16..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Casa-14b-sce/a0dde1eb-a763-4568-8122-1b280dedb2ce.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Casa-14b-sce/1762652579.820149",
- "retrieved_timestamp": "1762652579.820149",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Casa-14b-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Casa-14b-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6718218770639681
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6891400252742456
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4984894259818731
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3338926174496644
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4322916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5408078457446809
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Charlie-8B-Linear/c56d7463-dad2-4c9c-8823-a4b6faa5aeb9.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Charlie-8B-Linear/c56d7463-dad2-4c9c-8823-a4b6faa5aeb9.json
deleted file mode 100644
index f509976d419b2932de870b8683a3efbeaaf6b711..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Charlie-8B-Linear/c56d7463-dad2-4c9c-8823-a4b6faa5aeb9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Charlie-8B-Linear/1762652579.820338",
- "retrieved_timestamp": "1762652579.820339",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Charlie-8B-Linear",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Charlie-8B-Linear"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7380672172059026
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5141359215016831
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26510574018126887
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3485416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3572972074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Chromatic-8b-sce/f626897d-5003-40fa-8020-c100748a847f.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Chromatic-8b-sce/f626897d-5003-40fa-8020-c100748a847f.json
deleted file mode 100644
index ea86c76f8e8dba0100a182117a3814a700a41984..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Chromatic-8b-sce/f626897d-5003-40fa-8020-c100748a847f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Chromatic-8b-sce/1762652579.8205519",
- "retrieved_timestamp": "1762652579.820553",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Chromatic-8b-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Chromatic-8b-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5085074269604649
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5063171816307924
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1555891238670695
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.405125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37549867021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Dyson-14b/35c401bd-ed12-475e-afbc-e664243d90d5.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Dyson-14b/35c401bd-ed12-475e-afbc-e664243d90d5.json
deleted file mode 100644
index 4a7a55aa4952a4f410c4014a6b38fda3132d2041..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Dyson-14b/35c401bd-ed12-475e-afbc-e664243d90d5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Dyson-14b/1762652579.821013",
- "retrieved_timestamp": "1762652579.821014",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Dyson-14b",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Dyson-14b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5856682491345186
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6862902828866305
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5392749244712991
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4259375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5398936170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Edu-14B-Linear/a70e7642-3cc7-4719-bc22-68182baa3857.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Edu-14B-Linear/a70e7642-3cc7-4719-bc22-68182baa3857.json
deleted file mode 100644
index 60152df1e31d07cf793cd25439653335a7a741a3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Edu-14B-Linear/a70e7642-3cc7-4719-bc22-68182baa3857.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Edu-14B-Linear/1762652579.821216",
- "retrieved_timestamp": "1762652579.821216",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Edu-14B-Linear",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Edu-14B-Linear"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6158182511292261
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6757820996225599
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24471299093655588
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31711409395973156
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43775000000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.508560505319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Fugazi14b/ee38e1c3-7a6b-4357-94ac-b309da33d14b.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Fugazi14b/ee38e1c3-7a6b-4357-94ac-b309da33d14b.json
deleted file mode 100644
index 8bb386dd3ceeed7a128a26fca4c9e540b745d9d8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Fugazi14b/ee38e1c3-7a6b-4357-94ac-b309da33d14b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Fugazi14b/1762652579.8215911",
- "retrieved_timestamp": "1762652579.821592",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Fugazi14b",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Fugazi14b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6997987561891337
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6941017680723065
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4652567975830816
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35151006711409394
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45455208333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5417220744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_GZA-14B-sce/cfb61ec3-ab7e-4697-892e-a8dd62518f39.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_GZA-14B-sce/cfb61ec3-ab7e-4697-892e-a8dd62518f39.json
deleted file mode 100644
index d2dd0e58fb9b8648f87073b477b0582cfa4948d6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_GZA-14B-sce/cfb61ec3-ab7e-4697-892e-a8dd62518f39.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_GZA-14B-sce/1762652579.821823",
- "retrieved_timestamp": "1762652579.821824",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/GZA-14B-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/GZA-14B-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6274086091570367
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6686539892126272
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47205438066465255
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4284791666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.523188164893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Geedorah-14B/c4a79914-b049-436b-9de6-640cc3e119ee.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Geedorah-14B/c4a79914-b049-436b-9de6-640cc3e119ee.json
deleted file mode 100644
index 449538b4d8a8486debf963cfcee7508227e797af..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Geedorah-14B/c4a79914-b049-436b-9de6-640cc3e119ee.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Geedorah-14B/1762652579.822031",
- "retrieved_timestamp": "1762652579.822032",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Geedorah-14B",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Geedorah-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6872841837435781
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6964189914061528
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44486404833836857
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34731543624161076
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45467708333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5421376329787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_GivingTree-8b-sce/9b753075-a150-4bc3-9425-2371010daf8b.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_GivingTree-8b-sce/9b753075-a150-4bc3-9425-2371010daf8b.json
deleted file mode 100644
index 4abeac26dd248fa890aa4bdd47bf5332b947c034..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_GivingTree-8b-sce/9b753075-a150-4bc3-9425-2371010daf8b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_GivingTree-8b-sce/1762652579.8222332",
- "retrieved_timestamp": "1762652579.8222342",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/GivingTree-8b-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/GivingTree-8b-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5006139266036339
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5040482025572203
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15256797583081572
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3221476510067114
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.405125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37608045212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_GuiltySpark-14B-ties/2b50b73e-9734-4502-b088-8d4936291aaa.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_GuiltySpark-14B-ties/2b50b73e-9734-4502-b088-8d4936291aaa.json
deleted file mode 100644
index 7e6adf266d596baaf816a1e8f3e19c2ab5789ba7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_GuiltySpark-14B-ties/2b50b73e-9734-4502-b088-8d4936291aaa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_GuiltySpark-14B-ties/1762652579.822431",
- "retrieved_timestamp": "1762652579.822432",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/GuiltySpark-14B-ties",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/GuiltySpark-14B-ties"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6854357549080883
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6914302574038697
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38368580060422963
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3649328859060403
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4557291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5399767287234043
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Halo-14B-sce/156424f1-2a1e-4e61-b081-bb066ee3958d.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Halo-14B-sce/156424f1-2a1e-4e61-b081-bb066ee3958d.json
deleted file mode 100644
index 709ddae210940ae4d89ac4b383d094eb0bfcd424..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Halo-14B-sce/156424f1-2a1e-4e61-b081-bb066ee3958d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Halo-14B-sce/1762652579.822633",
- "retrieved_timestamp": "1762652579.822633",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Halo-14B-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Halo-14B-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6753691316817156
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6875692490185378
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42900302114803623
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34731543624161076
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44007291666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5376496010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Heretic1.5b/e3d7453d-0ba6-4980-be81-827122149bb6.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Heretic1.5b/e3d7453d-0ba6-4980-be81-827122149bb6.json
deleted file mode 100644
index dbf0fe0b35dbb2cde1da02b6a1d84cb34b8cbdaf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Heretic1.5b/e3d7453d-0ba6-4980-be81-827122149bb6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Heretic1.5b/1762652579.8228369",
- "retrieved_timestamp": "1762652579.8228369",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Heretic1.5b",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Heretic1.5b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20615633186611523
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3529180801121154
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24395770392749244
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3511458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17278922872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.73
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Hyde-14b-sce/814ce716-6f61-4980-a8f6-7918c7b0eea5.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Hyde-14b-sce/814ce716-6f61-4980-a8f6-7918c7b0eea5.json
deleted file mode 100644
index ecb6e8528e417a202526be4bd1d70648ca23ea07..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Hyde-14b-sce/814ce716-6f61-4980-a8f6-7918c7b0eea5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Hyde-14b-sce/1762652579.823039",
- "retrieved_timestamp": "1762652579.823039",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Hyde-14b-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Hyde-14b-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6715470507143269
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6885164810743584
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27341389728096677
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3414429530201342
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41409375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5300033244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Imagine-v0.5-16bit/ccb33ad4-98f5-4980-a442-1a1772fab792.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Imagine-v0.5-16bit/ccb33ad4-98f5-4980-a442-1a1772fab792.json
deleted file mode 100644
index c29cc7b9a3833456bb92c73e615583263c517b0b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Imagine-v0.5-16bit/ccb33ad4-98f5-4980-a442-1a1772fab792.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Imagine-v0.5-16bit/1762652579.823242",
- "retrieved_timestamp": "1762652579.823243",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Imagine-v0.5-16bit",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Imagine-v0.5-16bit"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2758990589413866
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6769135492947932
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13972809667673716
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3649328859060403
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43492708333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.535405585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Imbue-14b/c50c07fc-b529-43c9-9f3d-0f1ff174b905.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Imbue-14b/c50c07fc-b529-43c9-9f3d-0f1ff174b905.json
deleted file mode 100644
index 751705fffcacb5dc0aa751c99d07d3a4f180345f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Imbue-14b/c50c07fc-b529-43c9-9f3d-0f1ff174b905.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Imbue-14b/1762652579.8234398",
- "retrieved_timestamp": "1762652579.8234408",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Imbue-14b",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Imbue-14b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5199725616918665
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6845292092854045
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5317220543806647
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41672916666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5402260638297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Insom/51f419c6-1107-41c9-896b-fadbbde4f5e9.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Insom/51f419c6-1107-41c9-896b-fadbbde4f5e9.json
deleted file mode 100644
index b688722143ca5ad906cdc3d6c2f907c8445a8f31..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Insom/51f419c6-1107-41c9-896b-fadbbde4f5e9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Insom/1762652579.823634",
- "retrieved_timestamp": "1762652579.8236349",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Insom",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Insom"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.68183863260593
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6881456689046391
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3851963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3498322147651007
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43114583333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5352393617021277
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_InspectorDeck-14B-sce/1ac547e3-1b29-462a-aa08-1e9ef9e3f409.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_InspectorDeck-14B-sce/1ac547e3-1b29-462a-aa08-1e9ef9e3f409.json
deleted file mode 100644
index a04e078539d528fc5096baff79d773e84071ed8f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_InspectorDeck-14B-sce/1ac547e3-1b29-462a-aa08-1e9ef9e3f409.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_InspectorDeck-14B-sce/1762652579.8238342",
- "retrieved_timestamp": "1762652579.8238342",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/InspectorDeck-14B-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/InspectorDeck-14B-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32408454013129606
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6668480318764974
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3164652567975831
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39815625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5260970744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Jekyl-8b-sce/dc6a9e35-c130-4edc-93bc-5f0b6ac0e05d.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Jekyl-8b-sce/dc6a9e35-c130-4edc-93bc-5f0b6ac0e05d.json
deleted file mode 100644
index 87a93a91778824509d26cadee6f3b3db28434ede..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Jekyl-8b-sce/dc6a9e35-c130-4edc-93bc-5f0b6ac0e05d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Jekyl-8b-sce/1762652579.82404",
- "retrieved_timestamp": "1762652579.824041",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Jekyl-8b-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Jekyl-8b-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46968931324441365
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4993588236391566
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16163141993957703
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41966666666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3686003989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Jigsaw-14B-Linear/7533defe-b19d-4571-a403-c443ec03a31b.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Jigsaw-14B-Linear/7533defe-b19d-4571-a403-c443ec03a31b.json
deleted file mode 100644
index 9a646a47b9407ddaa1643bfc72f4e9ab5f088a9e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Jigsaw-14B-Linear/7533defe-b19d-4571-a403-c443ec03a31b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Jigsaw-14B-Linear/1762652579.824291",
- "retrieved_timestamp": "1762652579.824291",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Jigsaw-14B-Linear",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Jigsaw-14B-Linear"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6480416406246536
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6864625931836906
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26510574018126887
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34060402684563756
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44826041666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5233543882978723
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Katana-8b-sce/dea8c833-7deb-43f8-9b15-acbadf4fc749.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Katana-8b-sce/dea8c833-7deb-43f8-9b15-acbadf4fc749.json
deleted file mode 100644
index c4a541ffc947e70cdfcf43c73290678eb0f7c8f8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Katana-8b-sce/dea8c833-7deb-43f8-9b15-acbadf4fc749.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Katana-8b-sce/1762652579.8246028",
- "retrieved_timestamp": "1762652579.8246038",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Katana-8b-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Katana-8b-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5107304175144174
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5074684221457483
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1510574018126888
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32466442953020136
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4037604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3770777925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Knot-CoT-14B-sce/fe0b75bf-2035-4ffe-8cbf-d5f4c66907aa.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Knot-CoT-14B-sce/fe0b75bf-2035-4ffe-8cbf-d5f4c66907aa.json
deleted file mode 100644
index 17bf6a2c10020469573030e5da45bfa503802861..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Knot-CoT-14B-sce/fe0b75bf-2035-4ffe-8cbf-d5f4c66907aa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Knot-CoT-14B-sce/1762652579.8248682",
- "retrieved_timestamp": "1762652579.8248692",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Knot-CoT-14B-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Knot-CoT-14B-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4831779677921249
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6615610657544672
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3995468277945619
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41403125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.515375664893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Lineage-14B/37f890b7-5487-46ea-b61e-d91b5349d078.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Lineage-14B/37f890b7-5487-46ea-b61e-d91b5349d078.json
deleted file mode 100644
index aaa0f4b34fe7ad7b96e6ad90e57426fca254dd8f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Lineage-14B/37f890b7-5487-46ea-b61e-d91b5349d078.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Lineage-14B/1762652579.82509",
- "retrieved_timestamp": "1762652579.8250911",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Lineage-14B",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Lineage-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7070428684778609
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6933789516730196
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4244712990936556
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3598993288590604
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4597291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5410571808510638
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Loke-14B-sce/cfac443e-5c66-45e3-bf7a-7c596d01d4ff.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Loke-14B-sce/cfac443e-5c66-45e3-bf7a-7c596d01d4ff.json
deleted file mode 100644
index 503da1ce691cdfcf5417ea36f67d1f29a8a25aa1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Loke-14B-sce/cfac443e-5c66-45e3-bf7a-7c596d01d4ff.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Loke-14B-sce/1762652579.825529",
- "retrieved_timestamp": "1762652579.82553",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Loke-14B-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Loke-14B-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6847863668399845
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6923902176707362
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3904833836858006
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3649328859060403
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46366666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5401429521276596
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_MFDOOM-14B/3efa12a5-4525-4ee9-80bd-99c4b8d2ccb2.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_MFDOOM-14B/3efa12a5-4525-4ee9-80bd-99c4b8d2ccb2.json
deleted file mode 100644
index 901ed05e8129fe82847dc2b1f2967071515ffccf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_MFDOOM-14B/3efa12a5-4525-4ee9-80bd-99c4b8d2ccb2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_MFDOOM-14B/1762652579.825741",
- "retrieved_timestamp": "1762652579.825742",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/MFDOOM-14B",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/MFDOOM-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6736204382150472
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6916400252742457
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5264350453172205
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32298657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43765625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5425531914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_MFGRIMM-14B/773228d8-7e03-4ba8-87c1-f59ac5aad425.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_MFGRIMM-14B/773228d8-7e03-4ba8-87c1-f59ac5aad425.json
deleted file mode 100644
index f7e9cc965ae8af6fc5fec7b0533a4b1c34af9ba1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_MFGRIMM-14B/773228d8-7e03-4ba8-87c1-f59ac5aad425.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_MFGRIMM-14B/1762652579.8259468",
- "retrieved_timestamp": "1762652579.825948",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/MFGRIMM-14B",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/MFGRIMM-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6894074389287091
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.69087746819662
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5060422960725075
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3338926174496644
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43613541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5416389627659575
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Mithril-14B-sce/8ab4e441-2efb-4510-87ea-43f3fbcc67ac.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Mithril-14B-sce/8ab4e441-2efb-4510-87ea-43f3fbcc67ac.json
deleted file mode 100644
index 157f65994d4a16f9e358e676e51d352f8a382518..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Mithril-14B-sce/8ab4e441-2efb-4510-87ea-43f3fbcc67ac.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Mithril-14B-sce/1762652579.826359",
- "retrieved_timestamp": "1762652579.82636",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Mithril-14B-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Mithril-14B-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6957772044841022
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6925969240705362
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3821752265861027
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3691275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4610625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5403091755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Mononoke-14B-sce/6f2d122b-f7fe-448a-ac8b-864314e94692.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Mononoke-14B-sce/6f2d122b-f7fe-448a-ac8b-864314e94692.json
deleted file mode 100644
index 5f2beee581615a9fd7c7dc5f6921084aa996d2f1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Mononoke-14B-sce/6f2d122b-f7fe-448a-ac8b-864314e94692.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Mononoke-14B-sce/1762652579.8265631",
- "retrieved_timestamp": "1762652579.826564",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Mononoke-14B-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Mononoke-14B-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3502129904209719
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6744431226588331
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4697885196374622
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32298657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4154583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5297539893617021
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Motion-8B-Linear/db82138b-f915-4451-aa85-8bc4c7fdd225.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Motion-8B-Linear/db82138b-f915-4451-aa85-8bc4c7fdd225.json
deleted file mode 100644
index f5ed2ea441b7e07fbbd1774310e2e165753158ee..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Motion-8B-Linear/db82138b-f915-4451-aa85-8bc4c7fdd225.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Motion-8B-Linear/1762652579.826771",
- "retrieved_timestamp": "1762652579.826771",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Motion-8B-Linear",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Motion-8B-Linear"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7685917809190725
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5084252652465131
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18882175226586104
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36060416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3784906914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Mouse-9B/70e3145f-d67b-403d-af2a-1b06b2ba0f24.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Mouse-9B/70e3145f-d67b-403d-af2a-1b06b2ba0f24.json
deleted file mode 100644
index 7040b36539124cbf3887936d4c6c8060141ea6f5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Mouse-9B/70e3145f-d67b-403d-af2a-1b06b2ba0f24.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Mouse-9B/1762652579.826978",
- "retrieved_timestamp": "1762652579.826978",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Mouse-9B",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Mouse-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1324917884546337
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29789470527601253
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.005287009063444109
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25419463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3469583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11386303191489362
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 9.207
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Nova-14b-sce/3336c8fa-fcef-4513-946d-9254f537e418.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Nova-14b-sce/3336c8fa-fcef-4513-946d-9254f537e418.json
deleted file mode 100644
index 70994c6afc6c49025b8c2d68ca3f7810309ef423..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Nova-14b-sce/3336c8fa-fcef-4513-946d-9254f537e418.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Nova-14b-sce/1762652579.827177",
- "retrieved_timestamp": "1762652579.827178",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Nova-14b-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Nova-14b-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7021968377239058
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6935261478148286
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4161631419939577
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36325503355704697
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4570625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5413065159574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_NovaScotia-14b-stock/8ab3ce59-d0cd-4764-98c7-c4df81bc3c23.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_NovaScotia-14b-stock/8ab3ce59-d0cd-4764-98c7-c4df81bc3c23.json
deleted file mode 100644
index 8cf9a071a7b262416d716149dc3a80476e2eb4d6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_NovaScotia-14b-stock/8ab3ce59-d0cd-4764-98c7-c4df81bc3c23.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_NovaScotia-14b-stock/1762652579.827381",
- "retrieved_timestamp": "1762652579.827381",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/NovaScotia-14b-stock",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/NovaScotia-14b-stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6787412953186434
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6935261478148286
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46299093655589124
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.348993288590604
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44934375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5408909574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_ODB-14B-sce/66743ed1-93ab-41f7-9002-0080e7f74722.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_ODB-14B-sce/66743ed1-93ab-41f7-9002-0080e7f74722.json
deleted file mode 100644
index f6982052b06ee0aa46b7804bd07e41de3a59e346..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_ODB-14B-sce/66743ed1-93ab-41f7-9002-0080e7f74722.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_ODB-14b-sce/1762652579.827807",
- "retrieved_timestamp": "1762652579.827808",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/ODB-14b-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/ODB-14b-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7015973173402128
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6941928144814953
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.411631419939577
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3624161073825503
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4570625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5411402925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_ODB-14B-sce/79d7d2a1-dcb6-40a7-b29c-7213ebd261df.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_ODB-14B-sce/79d7d2a1-dcb6-40a7-b29c-7213ebd261df.json
deleted file mode 100644
index c5d36571c9a75a80aefa526847fca0dc8aacefaf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_ODB-14B-sce/79d7d2a1-dcb6-40a7-b29c-7213ebd261df.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_ODB-14B-sce/1762652579.827594",
- "retrieved_timestamp": "1762652579.827595",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/ODB-14B-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/ODB-14B-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.292235712354331
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6558922017209644
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2545317220543807
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39288541666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5206948138297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Unknown",
- "params_billions": 0.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Oasis-14B-ties/a3ef4bc2-c560-4a62-8227-2bd30120b537.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Oasis-14B-ties/a3ef4bc2-c560-4a62-8227-2bd30120b537.json
deleted file mode 100644
index 9a3a7a793c44b132d348b26dc375606125a02e73..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Oasis-14B-ties/a3ef4bc2-c560-4a62-8227-2bd30120b537.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Oasis-14B-ties/1762652579.827992",
- "retrieved_timestamp": "1762652579.8279932",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Oasis-14B-ties",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Oasis-14B-ties"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6936539492989712
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6914976731342066
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37537764350453173
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3649328859060403
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4570625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5404753989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Origami-14B-sce/82826944-e4a1-47bd-b240-c70e21acfc51.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Origami-14B-sce/82826944-e4a1-47bd-b240-c70e21acfc51.json
deleted file mode 100644
index 1c209a31b1da2cc504934cf58e4017e716e4940c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Origami-14B-sce/82826944-e4a1-47bd-b240-c70e21acfc51.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Origami-14B-sce/1762652579.828193",
- "retrieved_timestamp": "1762652579.8281941",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Origami-14B-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Origami-14B-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3259329689667859
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6620277470720752
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29154078549848944
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2835570469798658
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40348958333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5244348404255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Ponder-14B-linear/30942374-a112-4035-a4f2-e30bff57f9ce.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Ponder-14B-linear/30942374-a112-4035-a4f2-e30bff57f9ce.json
deleted file mode 100644
index a76444f99aca596527dc0e8156d61b911fe6316f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Ponder-14B-linear/30942374-a112-4035-a4f2-e30bff57f9ce.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Ponder-14B-linear/1762652579.8290088",
- "retrieved_timestamp": "1762652579.8290088",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Ponder-14B-linear",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Ponder-14B-linear"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6906064796960952
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6942602302118323
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4282477341389728
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35822147651006714
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45576041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5408078457446809
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_RZA-14B-sce/e8a8cf1f-5bcf-45ae-b590-fb04de06b77f.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_RZA-14B-sce/e8a8cf1f-5bcf-45ae-b590-fb04de06b77f.json
deleted file mode 100644
index 962a40719ddff9d765791a4eee5ca8718a76f9b7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_RZA-14B-sce/e8a8cf1f-5bcf-45ae-b590-fb04de06b77f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_RZA-14B-sce/1762652579.829216",
- "retrieved_timestamp": "1762652579.829216",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/RZA-14B-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/RZA-14B-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4773578549360142
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6685829139021245
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5188821752265861
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41133333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.538314494680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Rosemary-14b/84018db9-2b85-4b6f-beff-b4930b230399.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Rosemary-14b/84018db9-2b85-4b6f-beff-b4930b230399.json
deleted file mode 100644
index f438e496fec9a7a6779d34038db8d6c676b564ba..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Rosemary-14b/84018db9-2b85-4b6f-beff-b4930b230399.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Rosemary-14b/1762652579.829469",
- "retrieved_timestamp": "1762652579.82947",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Rosemary-14b",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Rosemary-14b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6915306941138402
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6955261478148286
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.438821752265861
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3565436241610738
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44921875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5396442819148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Rune-14b/3ed52eaf-6b73-46ab-8ae7-3afe120fe437.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Rune-14b/3ed52eaf-6b73-46ab-8ae7-3afe120fe437.json
deleted file mode 100644
index 4817ea5e2142106888a9acabc977a2e2d6def515..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Rune-14b/3ed52eaf-6b73-46ab-8ae7-3afe120fe437.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Rune-14b/1762652579.829681",
- "retrieved_timestamp": "1762652579.8296819",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Rune-14b",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Rune-14b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7015973173402128
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6937489642141156
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45845921450151056
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35151006711409394
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45328125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5411402925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_SZA-14B-sce/6d983237-925e-4197-a592-17cca9219bda.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_SZA-14B-sce/6d983237-925e-4197-a592-17cca9219bda.json
deleted file mode 100644
index bf5fc724580c6935f3d9cb7a04be35841b38ee3a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_SZA-14B-sce/6d983237-925e-4197-a592-17cca9219bda.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_SZA-14B-sce/1762652579.829889",
- "retrieved_timestamp": "1762652579.82989",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/SZA-14B-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/SZA-14B-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5659095644002359
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6888749072998727
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5241691842900302
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.433875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5353224734042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Sake-20b/25a672ed-3e0e-416f-abf4-a935e63171c6.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Sake-20b/25a672ed-3e0e-416f-abf4-a935e63171c6.json
deleted file mode 100644
index 12ec012b94fdd6bf6faeb7c82fe0e755120519db..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Sake-20b/25a672ed-3e0e-416f-abf4-a935e63171c6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Sake-20b/1762652579.830092",
- "retrieved_timestamp": "1762652579.8300931",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Sake-20b",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Sake-20b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6692741924759638
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6769823539837527
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4652567975830816
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3187919463087248
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44940625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5391456117021277
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 21.475
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Spok-14b-sce/9f15293c-5668-4895-b4d0-4062cac344e7.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Spok-14b-sce/9f15293c-5668-4895-b4d0-4062cac344e7.json
deleted file mode 100644
index 5fdb8fc043440d99dfc9c32d3a15b951155b97a6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Spok-14b-sce/9f15293c-5668-4895-b4d0-4062cac344e7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Spok-14b-sce/1762652579.830291",
- "retrieved_timestamp": "1762652579.830292",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Spok-14b-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Spok-14b-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6681748870773991
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6899172301380289
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2719033232628399
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34563758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41409375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5297539893617021
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Sumatra-20b/ae69fb3f-19a1-4b00-9309-8685e107aeba.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Sumatra-20b/ae69fb3f-19a1-4b00-9309-8685e107aeba.json
deleted file mode 100644
index 51311249502ec822e7454d00b979ad0297bcc7b3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Sumatra-20b/ae69fb3f-19a1-4b00-9309-8685e107aeba.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Sumatra-20b/1762652579.830487",
- "retrieved_timestamp": "1762652579.830488",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Sumatra-20b",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Sumatra-20b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.673795529195867
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6855416597047258
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36706948640483383
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3263422818791946
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4560104166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5414727393617021
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 21.475
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_SuperNova14b/b0659361-fb53-40db-81a7-2a72771bbd1a.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_SuperNova14b/b0659361-fb53-40db-81a7-2a72771bbd1a.json
deleted file mode 100644
index e6ceb3c3bae9396c6dfc64b55f862914b547d123..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_SuperNova14b/b0659361-fb53-40db-81a7-2a72771bbd1a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_SuperNova14b/1762652579.830682",
- "retrieved_timestamp": "1762652579.830683",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/SuperNova14b",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/SuperNova14b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.707642388861554
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6937489642141156
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4395770392749245
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3523489932885906
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4545208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.543467420212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_TB0-8B-sce/8f0da98a-cf9f-4cbb-8d4a-8c12d737580c.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_TB0-8B-sce/8f0da98a-cf9f-4cbb-8d4a-8c12d737580c.json
deleted file mode 100644
index ac3d8cee1987b81c8adedab4f468624473c05d0d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_TB0-8B-sce/8f0da98a-cf9f-4cbb-8d4a-8c12d737580c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_TB0-8B-sce/1762652579.8308768",
- "retrieved_timestamp": "1762652579.8308768",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/TB0-8B-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/TB0-8B-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5107304175144174
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5074684221457483
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1510574018126888
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32466442953020136
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4037604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3770777925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_TBL-8B-sce/4bff88c0-89fb-4d07-a83d-251c7aaeace4.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_TBL-8B-sce/4bff88c0-89fb-4d07-a83d-251c7aaeace4.json
deleted file mode 100644
index 5ed9a174b5c48801f671a4c0b7e9906a2bf585b6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_TBL-8B-sce/4bff88c0-89fb-4d07-a83d-251c7aaeace4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_TBL-8B-sce/1762652579.831074",
- "retrieved_timestamp": "1762652579.831075",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/TBL-8B-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/TBL-8B-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45809895521660304
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5008187839060233
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15332326283987915
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3338926174496644
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42363541666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3689328457446808
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Venti-20b/2b97259b-d7a5-4934-b350-7b1322964899.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Venti-20b/2b97259b-d7a5-4934-b350-7b1322964899.json
deleted file mode 100644
index 0029072e0567b9cff9caccdc8267b43c8a9ac52a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Venti-20b/2b97259b-d7a5-4934-b350-7b1322964899.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Venti-20b/1762652579.8314738",
- "retrieved_timestamp": "1762652579.831475",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Venti-20b",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Venti-20b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6641034676879568
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6901240010129452
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3391238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33221476510067116
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44797916666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5386469414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 21.475
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Venti-Blend-sce/e9fa96ff-d790-4948-9071-dd1376701fc1.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Venti-Blend-sce/e9fa96ff-d790-4948-9071-dd1376701fc1.json
deleted file mode 100644
index 9130e62f42066233160ae42e18e0784aba7c16b5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Venti-Blend-sce/e9fa96ff-d790-4948-9071-dd1376701fc1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Venti-Blend-sce/1762652579.831816",
- "retrieved_timestamp": "1762652579.8318179",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Venti-Blend-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Venti-Blend-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6879335718116819
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6842921511560114
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40558912386706947
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43892708333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5413896276595744
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 21.475
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Vine-14b-sce/70d25d8c-96e9-45e4-b0d1-684a89278064.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Vine-14b-sce/70d25d8c-96e9-45e4-b0d1-684a89278064.json
deleted file mode 100644
index 05a5f424cf8368d042a81cf607e8e295eb8be151..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Vine-14b-sce/70d25d8c-96e9-45e4-b0d1-684a89278064.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Vine-14b-sce/1762652579.8321972",
- "retrieved_timestamp": "1762652579.832198",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Vine-14b-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Vine-14b-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.673345611865406
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6891400252742456
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5007552870090635
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3338926174496644
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4322916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5408078457446809
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Wendy-14B/13e6cad7-a063-4530-bec9-e70e4e98ccc0.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Wendy-14B/13e6cad7-a063-4530-bec9-e70e4e98ccc0.json
deleted file mode 100644
index f398b89d867ed4b8df644b97812fc2433934e972..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Wendy-14B/13e6cad7-a063-4530-bec9-e70e4e98ccc0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Wendy-14B/1762652579.832468",
- "retrieved_timestamp": "1762652579.832469",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Wendy-14B",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Wendy-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6772175605172055
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6957587467354328
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48338368580060426
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33221476510067116
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4428020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.543467420212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Wu-14b-sce/35443539-9756-466b-a36f-66adc5f68ddb.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Wu-14b-sce/35443539-9756-466b-a36f-66adc5f68ddb.json
deleted file mode 100644
index d0bdfb6e1044fc6046f7745bab2c46e9e72b3b5c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_Wu-14b-sce/35443539-9756-466b-a36f-66adc5f68ddb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_Wu-14b-sce/1762652579.832721",
- "retrieved_timestamp": "1762652579.832722",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/Wu-14b-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/Wu-14b-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6718218770639681
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6885164810743585
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26132930513595165
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3464765100671141
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41142708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5292553191489362
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_bloom-14b-stock/1a2b4a76-0feb-4404-a1ef-0408c75f2ca7.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_bloom-14b-stock/1a2b4a76-0feb-4404-a1ef-0408c75f2ca7.json
deleted file mode 100644
index 4e3c94ba505091edf2489c0f203a61241fe6709e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_bloom-14b-stock/1a2b4a76-0feb-4404-a1ef-0408c75f2ca7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_bloom-14b-stock/1762652579.8329449",
- "retrieved_timestamp": "1762652579.8329458",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/bloom-14b-stock",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/bloom-14b-stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6575087434673332
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6877869223612597
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4811178247734139
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43095833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5373171542553191
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_caramel-14B/a9d4b6a9-33af-42a3-be29-d3214a171433.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_caramel-14B/a9d4b6a9-33af-42a3-be29-d3214a171433.json
deleted file mode 100644
index 595e209ea28dc23655a34ee65c37628e12190836..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_caramel-14B/a9d4b6a9-33af-42a3-be29-d3214a171433.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_caramel-14B/1762652579.833162",
- "retrieved_timestamp": "1762652579.833163",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/caramel-14B",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/caramel-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6744947849483814
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6918707471458787
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47129909365558914
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3447986577181208
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.445375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5435505319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_mocha-14B/5c04fa63-11be-42d8-8133-4e79e08e42ad.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_mocha-14B/5c04fa63-11be-42d8-8133-4e79e08e42ad.json
deleted file mode 100644
index 07b3aaec1439836861735e8d302f30287e70c3a0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_mocha-14B/5c04fa63-11be-42d8-8133-4e79e08e42ad.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_mocha-14B/1762652579.833622",
- "retrieved_timestamp": "1762652579.833623",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/mocha-14B",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/mocha-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5893152391210876
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6894730595527842
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5264350453172205
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4271770833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5383976063829787
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_mosaic-14b-sce/4fd82b3e-4b13-4e21-9253-6492f8b1feaa.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_mosaic-14b-sce/4fd82b3e-4b13-4e21-9253-6492f8b1feaa.json
deleted file mode 100644
index f92e34bff3c376bf1151794f339ac0d63a666a56..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_mosaic-14b-sce/4fd82b3e-4b13-4e21-9253-6492f8b1feaa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_mosaic-14b-sce/1762652579.8338351",
- "retrieved_timestamp": "1762652579.833836",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/mosaic-14b-sce",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/mosaic-14b-sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6875590100932193
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6907089244809823
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4025679758308157
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3624161073825503
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45579166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5396442819148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_tesseract-14b-stock/4311b63a-282b-4c16-8609-a1d4ab93ace9.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_tesseract-14b-stock/4311b63a-282b-4c16-8609-a1d4ab93ace9.json
deleted file mode 100644
index 27e67e8ba2aea9284e5afa387a279db76d629dd4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_tesseract-14b-stock/4311b63a-282b-4c16-8609-a1d4ab93ace9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_tesseract-14b-stock/1762652579.834054",
- "retrieved_timestamp": "1762652579.834055",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/tesseract-14b-stock",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/tesseract-14b-stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5847939024011845
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6880007346047826
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5143504531722054
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3271812080536913
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42323958333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5388962765957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_time-14b-stock/2755da2c-8347-4bbd-80ee-c58e77a26f5e.json b/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_time-14b-stock/2755da2c-8347-4bbd-80ee-c58e77a26f5e.json
deleted file mode 100644
index 21641aeda8c02647e7f50ebba11b844d33102483..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Quazim0t0/Quazim0t0_time-14b-stock/2755da2c-8347-4bbd-80ee-c58e77a26f5e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Quazim0t0_time-14b-stock/1762652579.834393",
- "retrieved_timestamp": "1762652579.8343942",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Quazim0t0/time-14b-stock",
- "developer": "Quazim0t0",
- "inference_platform": "unknown",
- "id": "Quazim0t0/time-14b-stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6699235805440675
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6897025970028126
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5083081570996979
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3347315436241611
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43232291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5418882978723404
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-0.5B-Chat/96baee1a-7ea7-454f-ac8b-fe1bead3cd93.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-0.5B-Chat/96baee1a-7ea7-454f-ac8b-fe1bead3cd93.json
deleted file mode 100644
index a76aa8368083ece9b4bf4beb08b16368517f97fb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-0.5B-Chat/96baee1a-7ea7-454f-ac8b-fe1bead3cd93.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-0.5B-Chat/1762652579.835679",
- "retrieved_timestamp": "1762652579.83568",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen1.5-0.5B-Chat",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen1.5-0.5B-Chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18072713732895385
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3166662152036714
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.006797583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26929530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3837083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12125997340425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.62
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-1.8B-Chat/d6107bde-875e-40f6-8471-3a3507758910.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-1.8B-Chat/d6107bde-875e-40f6-8471-3a3507758910.json
deleted file mode 100644
index 14f97dd401893e9af603b3316500c913a836b754..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-1.8B-Chat/d6107bde-875e-40f6-8471-3a3507758910.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-1.8B-Chat/1762652579.836214",
- "retrieved_timestamp": "1762652579.836215",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen1.5-1.8B-Chat",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen1.5-1.8B-Chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20190982149585324
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3255912875735599
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.019637462235649546
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42596875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18035239361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.837
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-110B-Chat/7cfcae3d-b623-4cf0-9ac8-529db46d05e6.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-110B-Chat/7cfcae3d-b623-4cf0-9ac8-529db46d05e6.json
deleted file mode 100644
index 5e6f09846542b028c666e5bc513061bc4d84df18..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-110B-Chat/7cfcae3d-b623-4cf0-9ac8-529db46d05e6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-110B-Chat/1762652579.836649",
- "retrieved_timestamp": "1762652579.836649",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen1.5-110B-Chat",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen1.5-110B-Chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5938864435254014
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6183800385588633
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23413897280966767
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3414429530201342
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45216666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48246343085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 111.21
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-14B-Chat/e2cdcc99-a1b6-43ee-9cda-2e7ccbd0ad8d.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-14B-Chat/e2cdcc99-a1b6-43ee-9cda-2e7ccbd0ad8d.json
deleted file mode 100644
index 9d8b6c0c93ba01f9536e7fb05042327d34de633b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-14B-Chat/e2cdcc99-a1b6-43ee-9cda-2e7ccbd0ad8d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-14B-Chat/1762652579.837058",
- "retrieved_timestamp": "1762652579.837059",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen1.5-14B-Chat",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen1.5-14B-Chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47680820223673187
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5228587510703555
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15256797583081572
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43997916666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36178523936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.167
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-32B-Chat/c14a0d32-1d27-4596-90d4-10a793aef9a2.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-32B-Chat/c14a0d32-1d27-4596-90d4-10a793aef9a2.json
deleted file mode 100644
index e31bf21b1cabcc206fcae24d9645e68396a9c7e7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-32B-Chat/c14a0d32-1d27-4596-90d4-10a793aef9a2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-32B-Chat/1762652579.8374798",
- "retrieved_timestamp": "1762652579.8374798",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen1.5-32B-Chat",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen1.5-32B-Chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5532199009738605
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6066899757930234
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19561933534743203
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4159791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4457280585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.512
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-4B-Chat/e3417d3e-7883-45a7-a631-9e5d105788c4.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-4B-Chat/e3417d3e-7883-45a7-a631-9e5d105788c4.json
deleted file mode 100644
index fb295ebc703f8e6ac838d2b850ce9588f53b429c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-4B-Chat/e3417d3e-7883-45a7-a631-9e5d105788c4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-4B-Chat/1762652579.837912",
- "retrieved_timestamp": "1762652579.837912",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen1.5-4B-Chat",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen1.5-4B-Chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31566576683200576
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40055485611486114
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.027945619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26677852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39778125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23961103723404256
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.95
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-7B-Chat/42e3c9e4-bf1a-43ae-87e7-056f735abe03.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-7B-Chat/42e3c9e4-bf1a-43ae-87e7-056f735abe03.json
deleted file mode 100644
index eacf6abade4df6437bf927b7ae225dd6b667ed22..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-7B-Chat/42e3c9e4-bf1a-43ae-87e7-056f735abe03.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-7B-Chat/1762652579.838321",
- "retrieved_timestamp": "1762652579.838322",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen1.5-7B-Chat",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen1.5-7B-Chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43711574178734647
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4510053116521351
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06268882175226587
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37790624999999994
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2951296542553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.721
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-MoE-A2.7B-Chat/daec0873-964e-459e-a1a1-49da96cd17cf.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-MoE-A2.7B-Chat/daec0873-964e-459e-a1a1-49da96cd17cf.json
deleted file mode 100644
index 49af3c35cadbf31fa060c232173c65779412479a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen1.5-MoE-A2.7B-Chat/daec0873-964e-459e-a1a1-49da96cd17cf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-MoE-A2.7B-Chat/1762652579.838758",
- "retrieved_timestamp": "1762652579.838758",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen1.5-MoE-A2.7B-Chat",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen1.5-MoE-A2.7B-Chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37953851336675576
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4272088620635824
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0634441087613293
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38987499999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29230385638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2MoeForCausalLM",
- "params_billions": 14.316
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-0.5B-Instruct/6986e9f0-d008-4418-b3cb-1e870cf57e02.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-0.5B-Instruct/6986e9f0-d008-4418-b3cb-1e870cf57e02.json
deleted file mode 100644
index 1bfcb1fac7cbd4d9fc96be8ef86c4b2a2030899f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-0.5B-Instruct/6986e9f0-d008-4418-b3cb-1e870cf57e02.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-0.5B-Instruct/1762652579.839177",
- "retrieved_timestamp": "1762652579.839178",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2-0.5B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2-0.5B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22466610814860127
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31725179384863494
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.028700906344410877
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24664429530201343
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33527083333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15309175531914893
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-1.5B-Instruct/984029c7-f957-4555-8460-dfecd99f44a1.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-1.5B-Instruct/984029c7-f957-4555-8460-dfecd99f44a1.json
deleted file mode 100644
index 147eebc5c4bc09b45b7706dbeea1420346d61b02..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-1.5B-Instruct/984029c7-f957-4555-8460-dfecd99f44a1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-1.5B-Instruct/1762652579.839607",
- "retrieved_timestamp": "1762652579.839607",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2-1.5B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2-1.5B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3371232773485463
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3852232408376059
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07175226586102719
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42928125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25008311170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-57B-A14B-Instruct/50496313-dc6c-4456-8a8c-15cd8ddbb480.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-57B-A14B-Instruct/50496313-dc6c-4456-8a8c-15cd8ddbb480.json
deleted file mode 100644
index c695fade272b6b3e7ef5067a2494567c970ed9b0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-57B-A14B-Instruct/50496313-dc6c-4456-8a8c-15cd8ddbb480.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-57B-A14B-Instruct/1762652579.84003",
- "retrieved_timestamp": "1762652579.840031",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2-57B-A14B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2-57B-A14B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6337783747124297
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5887606963532052
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28172205438066467
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43613541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45752992021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2MoeForCausalLM",
- "params_billions": 57.409
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-72B-Instruct/d9ae7c35-ac71-4703-9cfe-bf5fb5aa688e.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-72B-Instruct/d9ae7c35-ac71-4703-9cfe-bf5fb5aa688e.json
deleted file mode 100644
index 4cfc17031f8898f9561764c960aa64a9379cd2df..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-72B-Instruct/d9ae7c35-ac71-4703-9cfe-bf5fb5aa688e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-72B-Instruct/1762652579.840446",
- "retrieved_timestamp": "1762652579.840447",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2-72B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2-72B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7989168738945996
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.697730968386067
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4176737160120846
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3724832214765101
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4560104166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5403091755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.706
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-7B-Instruct/3e1ebb01-6fbb-498c-af58-022f50247ec9.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-7B-Instruct/3e1ebb01-6fbb-498c-af58-022f50247ec9.json
deleted file mode 100644
index ed17e358a73d4557d460e09982adcebca06db773..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-7B-Instruct/3e1ebb01-6fbb-498c-af58-022f50247ec9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-7B-Instruct/1762652579.84092",
- "retrieved_timestamp": "1762652579.84092",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2-7B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2-7B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5679075962889577
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5544781563793189
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2764350453172205
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39279166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38472406914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-Math-72B-Instruct/1c7bb42e-aa1c-4522-a4b0-bcc460876125.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-Math-72B-Instruct/1c7bb42e-aa1c-4522-a4b0-bcc460876125.json
deleted file mode 100644
index 5bbbd45f2a43d27dc28d36eb71eab283da6822ca..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-Math-72B-Instruct/1c7bb42e-aa1c-4522-a4b0-bcc460876125.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-Math-72B-Instruct/1762652579.841145",
- "retrieved_timestamp": "1762652579.8411462",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2-Math-72B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2-Math-72B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.569381463405985
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.634337660025181
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5536253776435045
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36828859060402686
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45169791666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42727726063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.706
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-VL-72B-Instruct/2f749e28-b845-45ab-a628-8f9b6a9029d9.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-VL-72B-Instruct/2f749e28-b845-45ab-a628-8f9b6a9029d9.json
deleted file mode 100644
index e4648e325dca335fc662d9f14b067c787f2b1888..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-VL-72B-Instruct/2f749e28-b845-45ab-a628-8f9b6a9029d9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-VL-72B-Instruct/1762652579.841569",
- "retrieved_timestamp": "1762652579.8415701",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2-VL-72B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2-VL-72B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5982326892644849
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6946287292338682
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34441087613293053
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3875838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44921875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5717253989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2VLForConditionalGeneration",
- "params_billions": 73.406
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-VL-7B-Instruct/6dd0eebe-ef61-431d-bf7c-c170475bed5f.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-VL-7B-Instruct/6dd0eebe-ef61-431d-bf7c-c170475bed5f.json
deleted file mode 100644
index 888c91fe2144812cd7b2994b053ce3bc434da5af..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2-VL-7B-Instruct/6dd0eebe-ef61-431d-bf7c-c170475bed5f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-VL-7B-Instruct/1762652579.841773",
- "retrieved_timestamp": "1762652579.841774",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2-VL-7B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2-VL-7B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4599218961245052
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5464507159069989
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1986404833836858
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40949135638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2VLForConditionalGeneration",
- "params_billions": 8.291
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-0.5B-Instruct/14d1ea99-ae05-42cd-9f2f-de1a98d9846d.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-0.5B-Instruct/14d1ea99-ae05-42cd-9f2f-de1a98d9846d.json
deleted file mode 100644
index edb8fb1389b7e884ae86b90890461874a0ede874..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-0.5B-Instruct/14d1ea99-ae05-42cd-9f2f-de1a98d9846d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-0.5B-Instruct/1762652579.842413",
- "retrieved_timestamp": "1762652579.8424141",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-0.5B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-0.5B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31529120511354314
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3321916429549138
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10347432024169184
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3341875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17195811170212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-0.5B-Instruct/883755e2-69eb-459b-ae7f-5548914aa65e.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-0.5B-Instruct/883755e2-69eb-459b-ae7f-5548914aa65e.json
deleted file mode 100644
index 4879eaba0191a0401fa88ffc59e51b13c3716195..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-0.5B-Instruct/883755e2-69eb-459b-ae7f-5548914aa65e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-0.5B-Instruct/1762652579.842189",
- "retrieved_timestamp": "1762652579.84219",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-0.5B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-0.5B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.307122878407071
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3340729214937266
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33288541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16971409574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.5
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-1.5B-Instruct/9744dd76-a8cd-4400-92a7-f10b375710ae.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-1.5B-Instruct/9744dd76-a8cd-4400-92a7-f10b375710ae.json
deleted file mode 100644
index 791b75f7b5732abb1621fff89500d379d4fb934f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-1.5B-Instruct/9744dd76-a8cd-4400-92a7-f10b375710ae.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-1.5B-Instruct/1762652579.842835",
- "retrieved_timestamp": "1762652579.842836",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-1.5B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-1.5B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4475569267321817
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4288982740422907
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22054380664652568
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2558724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3663125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27992021276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.5
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-14B-Instruct-1M/52ff136b-084f-4ca3-a48e-83fb0bbd8ebc.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-14B-Instruct-1M/52ff136b-084f-4ca3-a48e-83fb0bbd8ebc.json
deleted file mode 100644
index f93c2aea9aa88a88a51dd74ecd880c94846015bb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-14B-Instruct-1M/52ff136b-084f-4ca3-a48e-83fb0bbd8ebc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-14B-Instruct-1M/1762652579.843473",
- "retrieved_timestamp": "1762652579.843473",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-14B-Instruct-1M",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-14B-Instruct-1M"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8413564896696322
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6198222551365405
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5302114803625377
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34312080536912754
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.418
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4849567819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-14B-Instruct/1f3e04ab-9f97-4eda-9d40-669eda073ac3.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-14B-Instruct/1f3e04ab-9f97-4eda-9d40-669eda073ac3.json
deleted file mode 100644
index 518abf00a6aad297a42e84952c7e3a2090264976..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-14B-Instruct/1f3e04ab-9f97-4eda-9d40-669eda073ac3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-14B-Instruct/1762652579.843263",
- "retrieved_timestamp": "1762652579.843264",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-14B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-14B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8157776920792386
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6390453705906222
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.547583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3221476510067114
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4100625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4904421542553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-32B-Instruct/c921186d-6e97-46d6-b968-894159271620.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-32B-Instruct/c921186d-6e97-46d6-b968-894159271620.json
deleted file mode 100644
index 6a91b08d6eb3cd83bb6f52982a16f1331a946df3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-32B-Instruct/c921186d-6e97-46d6-b968-894159271620.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-32B-Instruct/1762652579.843922",
- "retrieved_timestamp": "1762652579.843922",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-32B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-32B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8346121623957765
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6912525080134339
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6253776435045317
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42612500000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.566655585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-3B-Instruct/9fb4e863-fd72-4b60-bc20-e32e64ce99e8.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-3B-Instruct/9fb4e863-fd72-4b60-bc20-e32e64ce99e8.json
deleted file mode 100644
index 55f7a982841a920af1fee310eb8f4a2b7726d3cb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-3B-Instruct/9fb4e863-fd72-4b60-bc20-e32e64ce99e8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-3B-Instruct/1762652579.844352",
- "retrieved_timestamp": "1762652579.844352",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-3B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-3B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6474919879253713
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.469276665604885
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3678247734138973
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39679166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3254654255319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-72B-Instruct/9ed2a831-aa5a-4e81-b8b5-397bc8b55835.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-72B-Instruct/9ed2a831-aa5a-4e81-b8b5-397bc8b55835.json
deleted file mode 100644
index 0c5cdbb69e301ad3cca2399675e58706478ba86a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-72B-Instruct/9ed2a831-aa5a-4e81-b8b5-397bc8b55835.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-72B-Instruct/1762652579.844789",
- "retrieved_timestamp": "1762652579.844789",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-72B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-72B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.863837949972739
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7272747321744824
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5981873111782477
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.375
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42060416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5625831117021277
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.706
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-7B-Instruct-1M/f338f8b3-d2fa-46e6-b2a1-b83303521b3f.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-7B-Instruct-1M/f338f8b3-d2fa-46e6-b2a1-b83303521b3f.json
deleted file mode 100644
index 8d7e125c322d3f742e7d2e3fa4eef2975fd71aef..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-7B-Instruct-1M/f338f8b3-d2fa-46e6-b2a1-b83303521b3f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-7B-Instruct-1M/1762652579.845428",
- "retrieved_timestamp": "1762652579.845428",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-7B-Instruct-1M",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-7B-Instruct-1M"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7447616767953474
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5403941270576822
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4335347432024169
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40869791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35048204787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-7B-Instruct/7a336f2b-3b33-4fde-bce6-2d1e884a1b26.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-7B-Instruct/7a336f2b-3b33-4fde-bce6-2d1e884a1b26.json
deleted file mode 100644
index d98156e9df77bb1bc057fde742dc54b04685a83c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-7B-Instruct/7a336f2b-3b33-4fde-bce6-2d1e884a1b26.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-7B-Instruct/1762652579.845207",
- "retrieved_timestamp": "1762652579.8452082",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-7B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-7B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7585251576926999
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5394231968299095
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40203125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4286901595744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-14B-Instruct/f2295cf4-86e0-4c73-8f3d-21c6e5ccd9d9.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-14B-Instruct/f2295cf4-86e0-4c73-8f3d-21c6e5ccd9d9.json
deleted file mode 100644
index 68048525ac950479592f9fb28f789a3d66a531c3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-14B-Instruct/f2295cf4-86e0-4c73-8f3d-21c6e5ccd9d9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-14B-Instruct/1762652579.846175",
- "retrieved_timestamp": "1762652579.846175",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-Coder-14B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-Coder-14B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6907560827493273
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6140296423661326
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.324773413897281
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3914583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3939494680851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-32B-Instruct/c0ca7adb-6221-415f-8ed6-0de6439db168.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-32B-Instruct/c0ca7adb-6221-415f-8ed6-0de6439db168.json
deleted file mode 100644
index 729a5f684f89e67bbd3700b239827182591d613e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-32B-Instruct/c0ca7adb-6221-415f-8ed6-0de6439db168.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-32B-Instruct/1762652579.846655",
- "retrieved_timestamp": "1762652579.846655",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-Coder-32B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-Coder-32B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7265267268625026
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6625222222405129
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4954682779456193
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.348993288590604
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4385833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44132313829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-7B-Instruct/7629f304-5235-485b-a7f6-f5a7f91fd35c.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-7B-Instruct/7629f304-5235-485b-a7f6-f5a7f91fd35c.json
deleted file mode 100644
index 4b16f461b3c1674a1a25051f4e18226e2c5087ef..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-7B-Instruct/7629f304-5235-485b-a7f6-f5a7f91fd35c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-7B-Instruct/1762652579.847122",
- "retrieved_timestamp": "1762652579.847123",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-Coder-7B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-Coder-7B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6101477413263474
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5007976986224548
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3716012084592145
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4072708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3351894946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-7B-Instruct/81749833-4f2a-4883-a789-c465c11b33b6.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-7B-Instruct/81749833-4f2a-4883-a789-c465c11b33b6.json
deleted file mode 100644
index 9823625092fa4a13e1fc0333f5bc9b41eb9a20c2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Coder-7B-Instruct/81749833-4f2a-4883-a789-c465c11b33b6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-7B-Instruct/1762652579.8473449",
- "retrieved_timestamp": "1762652579.8473458",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-Coder-7B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-Coder-7B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6147189457306613
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4999048550311305
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.030966767371601207
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4099375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33543882978723405
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Math-1.5B-Instruct/393c9602-bd87-48d7-ad95-6baf85ed3341.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Math-1.5B-Instruct/393c9602-bd87-48d7-ad95-6baf85ed3341.json
deleted file mode 100644
index 4fecfdea97658c5639af2289cfab265323cb3416..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Math-1.5B-Instruct/393c9602-bd87-48d7-ad95-6baf85ed3341.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Math-1.5B-Instruct/1762652579.84755",
- "retrieved_timestamp": "1762652579.84755",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-Math-1.5B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-Math-1.5B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1855731680829089
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37515353898426174
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2628398791540785
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3685416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1801030585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Math-72B-Instruct/64574dc3-4982-49c3-8526-09ebd5781175.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Math-72B-Instruct/64574dc3-4982-49c3-8526-09ebd5781175.json
deleted file mode 100644
index ee7447100ff04509dd82f7559c2ceeff17bd781b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Math-72B-Instruct/64574dc3-4982-49c3-8526-09ebd5781175.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Math-72B-Instruct/1762652579.847774",
- "retrieved_timestamp": "1762652579.847775",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-Math-72B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-Math-72B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4003466358151926
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6452266637803764
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6238670694864048
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44727083333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4812167553191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.706
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Math-7B-Instruct/6ba8109e-8906-420f-a780-d0bef4015e1a.json b/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Math-7B-Instruct/6ba8109e-8906-420f-a780-d0bef4015e1a.json
deleted file mode 100644
index fcb9a04333201ca6111dcfbe2835780ba1d1ba93..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Qwen/Qwen_Qwen2.5-Math-7B-Instruct/6ba8109e-8906-420f-a780-d0bef4015e1a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Math-7B-Instruct/1762652579.848376",
- "retrieved_timestamp": "1762652579.848377",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-Math-7B-Instruct",
- "developer": "Qwen",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-Math-7B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26358395723347383
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.438762734452786
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5808157099697885
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3647291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2819980053191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/RDson/RDson_WomboCombo-R1-Coder-14B-Preview/faa623a7-1bf8-4da6-b381-7701f0446b70.json b/leaderboard_data/HFOpenLLMv2/RDson/RDson_WomboCombo-R1-Coder-14B-Preview/faa623a7-1bf8-4da6-b381-7701f0446b70.json
deleted file mode 100644
index 7aced4c7c9a1eb1b56e8abe88eab76072a33c0e3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/RDson/RDson_WomboCombo-R1-Coder-14B-Preview/faa623a7-1bf8-4da6-b381-7701f0446b70.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/RDson_WomboCombo-R1-Coder-14B-Preview/1762652579.848609",
- "retrieved_timestamp": "1762652579.8486102",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "RDson/WomboCombo-R1-Coder-14B-Preview",
- "developer": "RDson",
- "inference_platform": "unknown",
- "id": "RDson/WomboCombo-R1-Coder-14B-Preview"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.628557782240012
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6392098699331132
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5989425981873112
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3213087248322148
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4843854166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5167885638297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/RLHFlow/RLHFlow_LLaMA3-iterative-DPO-final/8ccda2e0-9801-41b0-8491-eb36615860f2.json b/leaderboard_data/HFOpenLLMv2/RLHFlow/RLHFlow_LLaMA3-iterative-DPO-final/8ccda2e0-9801-41b0-8491-eb36615860f2.json
deleted file mode 100644
index 4d54c773d17c3d02c29188fea72f4aae76c08101..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/RLHFlow/RLHFlow_LLaMA3-iterative-DPO-final/8ccda2e0-9801-41b0-8491-eb36615860f2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/RLHFlow_LLaMA3-iterative-DPO-final/1762652579.849687",
- "retrieved_timestamp": "1762652579.849688",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "RLHFlow/LLaMA3-iterative-DPO-final",
- "developer": "RLHFlow",
- "inference_platform": "unknown",
- "id": "RLHFlow/LLaMA3-iterative-DPO-final"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.53401086893886
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5058257182733729
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08836858006042296
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2835570469798658
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3672708333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32571476063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/RWKV/RWKV_rwkv-raven-14b/9a90826f-9062-48aa-b047-d24f4e0d85ef.json b/leaderboard_data/HFOpenLLMv2/RWKV/RWKV_rwkv-raven-14b/9a90826f-9062-48aa-b047-d24f4e0d85ef.json
deleted file mode 100644
index 912a3f2527b6607c1101286d4e68c42404f3f11e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/RWKV/RWKV_rwkv-raven-14b/9a90826f-9062-48aa-b047-d24f4e0d85ef.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/RWKV_rwkv-raven-14b/1762652579.849975",
- "retrieved_timestamp": "1762652579.849976",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "RWKV/rwkv-raven-14b",
- "developer": "RWKV",
- "inference_platform": "unknown",
- "id": "RWKV/rwkv-raven-14b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07683723631076655
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3307041176552897
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.004531722054380665
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22902684563758388
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3951458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11502659574468085
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "RwkvForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Rakuten/Rakuten_RakutenAI-2.0-mini-instruct/549f9869-4b59-469b-b9fd-ea26114405a1.json b/leaderboard_data/HFOpenLLMv2/Rakuten/Rakuten_RakutenAI-2.0-mini-instruct/549f9869-4b59-469b-b9fd-ea26114405a1.json
deleted file mode 100644
index 0b1c3d3ee192c696eb5332882d7559ff41443216..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Rakuten/Rakuten_RakutenAI-2.0-mini-instruct/549f9869-4b59-469b-b9fd-ea26114405a1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Rakuten_RakutenAI-2.0-mini-instruct/1762652579.850244",
- "retrieved_timestamp": "1762652579.850244",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Rakuten/RakutenAI-2.0-mini-instruct",
- "developer": "Rakuten",
- "inference_platform": "unknown",
- "id": "Rakuten/RakutenAI-2.0-mini-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6793906833867471
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2867197270809481
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05211480362537765
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26677852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3249166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11178523936170212
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 1.535
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Rakuten/Rakuten_RakutenAI-7B-chat/91e22241-7b65-44b9-a437-34b56400af7a.json b/leaderboard_data/HFOpenLLMv2/Rakuten/Rakuten_RakutenAI-7B-chat/91e22241-7b65-44b9-a437-34b56400af7a.json
deleted file mode 100644
index e8814cac3114c877ee4a413fa12f1169855b5663..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Rakuten/Rakuten_RakutenAI-7B-chat/91e22241-7b65-44b9-a437-34b56400af7a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Rakuten_RakutenAI-7B-chat/1762652579.850715",
- "retrieved_timestamp": "1762652579.8507159",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Rakuten/RakutenAI-7B-chat",
- "developer": "Rakuten",
- "inference_platform": "unknown",
- "id": "Rakuten/RakutenAI-7B-chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26855521128383797
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4316204035758174
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02945619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25671140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37895833333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2798371010638298
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.373
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Rakuten/Rakuten_RakutenAI-7B/cab9a80e-94a6-4e7b-8980-1fa4482bac8a.json b/leaderboard_data/HFOpenLLMv2/Rakuten/Rakuten_RakutenAI-7B/cab9a80e-94a6-4e7b-8980-1fa4482bac8a.json
deleted file mode 100644
index 6de5c6673dc7f9a92c9e03ed880eecb6cc93e140..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Rakuten/Rakuten_RakutenAI-7B/cab9a80e-94a6-4e7b-8980-1fa4482bac8a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Rakuten_RakutenAI-7B/1762652579.8505",
- "retrieved_timestamp": "1762652579.850501",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Rakuten/RakutenAI-7B",
- "developer": "Rakuten",
- "inference_platform": "unknown",
- "id": "Rakuten/RakutenAI-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1555971488982566
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43149052613615435
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.019637462235649546
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37381250000000005
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28773271276595747
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.373
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_L3-Pneuma-8B/5eddb8a8-7281-4ae2-a4bc-f174598727e3.json b/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_L3-Pneuma-8B/5eddb8a8-7281-4ae2-a4bc-f174598727e3.json
deleted file mode 100644
index e7a2835a3f28784124dbc5fc55f6c8660186020f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_L3-Pneuma-8B/5eddb8a8-7281-4ae2-a4bc-f174598727e3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Replete-AI_L3-Pneuma-8B/1762652579.85093",
- "retrieved_timestamp": "1762652579.850931",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Replete-AI/L3-Pneuma-8B",
- "developer": "Replete-AI",
- "inference_platform": "unknown",
- "id": "Replete-AI/L3-Pneuma-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24132745559559746
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4908680380935449
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.054380664652567974
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3179530201342282
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4105208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3175698138297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_L3.1-Pneuma-8B/d20e8883-4cde-45dc-9d60-10284a2a5cdb.json b/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_L3.1-Pneuma-8B/d20e8883-4cde-45dc-9d60-10284a2a5cdb.json
deleted file mode 100644
index 88c4a6c75b6e85b6a1ea43721ba8ccc7504f53dc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_L3.1-Pneuma-8B/d20e8883-4cde-45dc-9d60-10284a2a5cdb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Replete-AI_L3.1-Pneuma-8B/1762652579.851203",
- "retrieved_timestamp": "1762652579.8512042",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Replete-AI/L3.1-Pneuma-8B",
- "developer": "Replete-AI",
- "inference_platform": "unknown",
- "id": "Replete-AI/L3.1-Pneuma-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.707642388861554
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.504990389092237
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21978851963746224
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3871145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36909906914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_Llama3-8B-Instruct-Replete-Adapted/861d8edd-2acf-4593-9768-8f77488ce8a4.json b/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_Llama3-8B-Instruct-Replete-Adapted/861d8edd-2acf-4593-9768-8f77488ce8a4.json
deleted file mode 100644
index 6722fef6a313ff3520d7d80103d7b6d373d2a69b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_Llama3-8B-Instruct-Replete-Adapted/861d8edd-2acf-4593-9768-8f77488ce8a4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Replete-AI_Llama3-8B-Instruct-Replete-Adapted/1762652579.8514109",
- "retrieved_timestamp": "1762652579.851412",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Replete-AI/Llama3-8B-Instruct-Replete-Adapted",
- "developer": "Replete-AI",
- "inference_platform": "unknown",
- "id": "Replete-AI/Llama3-8B-Instruct-Replete-Adapted"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6915306941138402
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48702618293318983
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07099697885196375
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36339583333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3390957446808511
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_Replete-Coder-Instruct-8b-Merged/398e665d-af8e-420c-95ce-5f9f4a4988af.json b/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_Replete-Coder-Instruct-8b-Merged/398e665d-af8e-420c-95ce-5f9f4a4988af.json
deleted file mode 100644
index c36eb521249f51cc54ce7bf900b932c833f6c652..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Replete-AI/Replete-AI_Replete-Coder-Instruct-8b-Merged/398e665d-af8e-420c-95ce-5f9f4a4988af.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-Coder-Instruct-8b-Merged/1762652579.851615",
- "retrieved_timestamp": "1762652579.851616",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Replete-AI/Replete-Coder-Instruct-8b-Merged",
- "developer": "Replete-AI",
- "inference_platform": "unknown",
- "id": "Replete-AI/Replete-Coder-Instruct-8b-Merged"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5387571643239937
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4461693860075828
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07779456193353475
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26929530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36603125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18051861702127658
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/RezVortex/RezVortex_JAJUKA-WEWILLNEVERFORGETYOU-3B/76f26fef-fa87-4cf5-a317-ea4b743e7432.json b/leaderboard_data/HFOpenLLMv2/RezVortex/RezVortex_JAJUKA-WEWILLNEVERFORGETYOU-3B/76f26fef-fa87-4cf5-a317-ea4b743e7432.json
deleted file mode 100644
index db4cf77b08f94e0ebc24071376a76c3aef4ae264..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/RezVortex/RezVortex_JAJUKA-WEWILLNEVERFORGETYOU-3B/76f26fef-fa87-4cf5-a317-ea4b743e7432.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/RezVortex_JAJUKA-WEWILLNEVERFORGETYOU-3B/1762652579.853197",
- "retrieved_timestamp": "1762652579.853197",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B",
- "developer": "RezVortex",
- "inference_platform": "unknown",
- "id": "RezVortex/JAJUKA-WEWILLNEVERFORGETYOU-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6858103166265509
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46189139399865614
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15483383685800603
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36302083333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3143284574468085
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/RezVortex/RezVortex_Jajuka-3b/a41d111c-dd5d-4f77-b52d-9a2dc9f31e50.json b/leaderboard_data/HFOpenLLMv2/RezVortex/RezVortex_Jajuka-3b/a41d111c-dd5d-4f77-b52d-9a2dc9f31e50.json
deleted file mode 100644
index 8ca85c344a5cbe7a36b9e88b700adce908a6086b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/RezVortex/RezVortex_Jajuka-3b/a41d111c-dd5d-4f77-b52d-9a2dc9f31e50.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/RezVortex_Jajuka-3b/1762652579.85344",
- "retrieved_timestamp": "1762652579.853441",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "RezVortex/Jajuka-3b",
- "developer": "RezVortex",
- "inference_platform": "unknown",
- "id": "RezVortex/Jajuka-3b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6925047762159957
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4593872338446621
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1593655589123867
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3670833333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3137466755319149
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-7B-DARE-0/93930443-dc12-422f-9920-470917ef8d7d.json b/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-7B-DARE-0/93930443-dc12-422f-9920-470917ef8d7d.json
deleted file mode 100644
index da628214c0e3c87db352944552bdc09bf486c4a3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-7B-DARE-0/93930443-dc12-422f-9920-470917ef8d7d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Ro-xe_FMixIA-7B-DARE-0/1762652579.8536398",
- "retrieved_timestamp": "1762652579.853641",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Ro-xe/FMixIA-7B-DARE-0",
- "developer": "Ro-xe",
- "inference_platform": "unknown",
- "id": "Ro-xe/FMixIA-7B-DARE-0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3341256754300811
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5035332799973222
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.052870090634441085
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45448958333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3016123670212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-7B-SLERP-27/7f08546a-3f05-4612-879c-3f293daeabd4.json b/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-7B-SLERP-27/7f08546a-3f05-4612-879c-3f293daeabd4.json
deleted file mode 100644
index 886b2725975398396c99c5536dbd12fa609d56a8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-7B-SLERP-27/7f08546a-3f05-4612-879c-3f293daeabd4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Ro-xe_FMixIA-7B-SLERP-27/1762652579.853882",
- "retrieved_timestamp": "1762652579.8538828",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Ro-xe/FMixIA-7B-SLERP-27",
- "developer": "Ro-xe",
- "inference_platform": "unknown",
- "id": "Ro-xe/FMixIA-7B-SLERP-27"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3765409114482905
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5150591725181265
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0634441087613293
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44115624999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30078125
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-7B-TIES-1/b5d64806-0d01-4c99-9ba6-6aff88c894bd.json b/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-7B-TIES-1/b5d64806-0d01-4c99-9ba6-6aff88c894bd.json
deleted file mode 100644
index a5317872a6353ee1dd15d93bb16a5558b97ae2cc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-7B-TIES-1/b5d64806-0d01-4c99-9ba6-6aff88c894bd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Ro-xe_FMixIA-7B-TIES-1/1762652579.8540852",
- "retrieved_timestamp": "1762652579.8540852",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Ro-xe/FMixIA-7B-TIES-1",
- "developer": "Ro-xe",
- "inference_platform": "unknown",
- "id": "Ro-xe/FMixIA-7B-TIES-1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34529160405501846
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5091539642456672
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05664652567975831
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28859060402684567
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46890625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2992021276595745
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-FrankenMerge-9.5B-PT-9/0d1c7e5e-4ddf-447b-9581-c62cedc2fedc.json b/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-FrankenMerge-9.5B-PT-9/0d1c7e5e-4ddf-447b-9581-c62cedc2fedc.json
deleted file mode 100644
index 9a8a5c6d54262a491440759826d5a07f272de6e9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Ro-xe/Ro-xe_FMixIA-FrankenMerge-9.5B-PT-9/0d1c7e5e-4ddf-447b-9581-c62cedc2fedc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Ro-xe_FMixIA-FrankenMerge-9.5B-PT-9/1762652579.8542862",
- "retrieved_timestamp": "1762652579.8542871",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9",
- "developer": "Ro-xe",
- "inference_platform": "unknown",
- "id": "Ro-xe/FMixIA-FrankenMerge-9.5B-PT-9"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19401632113902223
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5087851148631056
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0030211480362537764
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41703124999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36569148936170215
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.141
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/RubielLabarta/RubielLabarta_LogoS-7Bx2-MoE-13B-v0.2/63522d1e-d4bf-4071-a086-5ef016243ec1.json b/leaderboard_data/HFOpenLLMv2/RubielLabarta/RubielLabarta_LogoS-7Bx2-MoE-13B-v0.2/63522d1e-d4bf-4071-a086-5ef016243ec1.json
deleted file mode 100644
index 2e8ae1bb097989b02a3fca9a01486c7de91ab67a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/RubielLabarta/RubielLabarta_LogoS-7Bx2-MoE-13B-v0.2/63522d1e-d4bf-4071-a086-5ef016243ec1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/RubielLabarta_LogoS-7Bx2-MoE-13B-v0.2/1762652579.85476",
- "retrieved_timestamp": "1762652579.85476",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2",
- "developer": "RubielLabarta",
- "inference_platform": "unknown",
- "id": "RubielLabarta/LogoS-7Bx2-MoE-13B-v0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4378903531518593
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5206958722481815
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05740181268882175
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4226145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087599734042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 12.879
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_Evil-Alpaca-3B-L3.2/f9c7c5b5-6274-4971-a81a-6f88ec07ca93.json b/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_Evil-Alpaca-3B-L3.2/f9c7c5b5-6274-4971-a81a-6f88ec07ca93.json
deleted file mode 100644
index 2c7eab2779051b2205b77f4db0a97ef36556c0b7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_Evil-Alpaca-3B-L3.2/f9c7c5b5-6274-4971-a81a-6f88ec07ca93.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SaisExperiments_Evil-Alpaca-3B-L3.2/1762652579.8550148",
- "retrieved_timestamp": "1762652579.8550148",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SaisExperiments/Evil-Alpaca-3B-L3.2",
- "developer": "SaisExperiments",
- "inference_platform": "unknown",
- "id": "SaisExperiments/Evil-Alpaca-3B-L3.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32510848991786234
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4340757699220565
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0702416918429003
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4197604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2621343085106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_Gemma-2-2B-Opus-Instruct/369f84c6-022e-46ed-8cfc-2e0b4a8e175a.json b/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_Gemma-2-2B-Opus-Instruct/369f84c6-022e-46ed-8cfc-2e0b4a8e175a.json
deleted file mode 100644
index 8445c054b702b81cd8c0d7e76f9faffec9e7d404..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_Gemma-2-2B-Opus-Instruct/369f84c6-022e-46ed-8cfc-2e0b4a8e175a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SaisExperiments_Gemma-2-2B-Opus-Instruct/1762652579.855459",
- "retrieved_timestamp": "1762652579.8554602",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SaisExperiments/Gemma-2-2B-Opus-Instruct",
- "developer": "SaisExperiments",
- "inference_platform": "unknown",
- "id": "SaisExperiments/Gemma-2-2B-Opus-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.474959773401242
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4292846281445681
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05060422960725076
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2835570469798658
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4056875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2650432180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_Not-So-Small-Alpaca-24B/98275290-dbd0-462e-9028-4daa65cd5ce3.json b/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_Not-So-Small-Alpaca-24B/98275290-dbd0-462e-9028-4daa65cd5ce3.json
deleted file mode 100644
index 80591659d687351c7a6b7b7a9ae63dcb1af7b1c9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_Not-So-Small-Alpaca-24B/98275290-dbd0-462e-9028-4daa65cd5ce3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SaisExperiments_Not-So-Small-Alpaca-24B/1762652579.855924",
- "retrieved_timestamp": "1762652579.855925",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SaisExperiments/Not-So-Small-Alpaca-24B",
- "developer": "SaisExperiments",
- "inference_platform": "unknown",
- "id": "SaisExperiments/Not-So-Small-Alpaca-24B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6243611395541607
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5338637679203099
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18277945619335348
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35906040268456374
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42816666666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36943151595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 23.572
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_QwOwO-7B-V1/9064bdc6-b84b-4022-9d7a-63b1b76fc1bc.json b/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_QwOwO-7B-V1/9064bdc6-b84b-4022-9d7a-63b1b76fc1bc.json
deleted file mode 100644
index 4f71c9a34da49d41cdfc82631af3e62b94b0cb76..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SaisExperiments/SaisExperiments_QwOwO-7B-V1/9064bdc6-b84b-4022-9d7a-63b1b76fc1bc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SaisExperiments_QwOwO-7B-V1/1762652579.856126",
- "retrieved_timestamp": "1762652579.856126",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SaisExperiments/QwOwO-7B-V1",
- "developer": "SaisExperiments",
- "inference_platform": "unknown",
- "id": "SaisExperiments/QwOwO-7B-V1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45562551806983254
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5431230107025949
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3859516616314199
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38348958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42237367021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Anemoi-3B/b50b5452-b824-4fd6-b0e4-cdaea09139a2.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Anemoi-3B/b50b5452-b824-4fd6-b0e4-cdaea09139a2.json
deleted file mode 100644
index ac7cdf508140bb616e8bca4daa1b1bee8daaf90f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Anemoi-3B/b50b5452-b824-4fd6-b0e4-cdaea09139a2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_Anemoi-3B/1762652579.856576",
- "retrieved_timestamp": "1762652579.856576",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/Anemoi-3B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/Anemoi-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3803629924156793
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4921954661921298
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17749244712990936
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43706249999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3765791223404255
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.397
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Euphrates-14B/db8c1ba2-4029-45c5-b8a6-5343356266eb.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Euphrates-14B/db8c1ba2-4029-45c5-b8a6-5343356266eb.json
deleted file mode 100644
index 2e0682aafb5588aff045146b7e61d3329c822b8e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Euphrates-14B/db8c1ba2-4029-45c5-b8a6-5343356266eb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_Euphrates-14B/1762652579.856813",
- "retrieved_timestamp": "1762652579.8568141",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/Euphrates-14B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/Euphrates-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26468326263203856
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6137691668744961
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30513595166163143
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3934563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45157291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5255152925531915
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Magro-7B-v1.1/9e6c7958-689f-4437-b81a-c055d53ca33e.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Magro-7B-v1.1/9e6c7958-689f-4437-b81a-c055d53ca33e.json
deleted file mode 100644
index 103a4078035eda96869f0495773eb1a13450ba4c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Magro-7B-v1.1/9e6c7958-689f-4437-b81a-c055d53ca33e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_Magro-7B-v1.1/1762652579.857256",
- "retrieved_timestamp": "1762652579.857256",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/Magro-7B-v1.1",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/Magro-7B-v1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1204016454119514
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41790625208343796
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.024924471299093656
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4433229166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27642952127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Neptuno-3B/4c2150fc-f473-4bdc-8823-960778ccbc75.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Neptuno-3B/4c2150fc-f473-4bdc-8823-960778ccbc75.json
deleted file mode 100644
index 1d612a155afa316b287eb35c26c133197950e3fa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Neptuno-3B/4c2150fc-f473-4bdc-8823-960778ccbc75.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_Neptuno-3B/1762652579.857454",
- "retrieved_timestamp": "1762652579.857455",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/Neptuno-3B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/Neptuno-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42962229107656574
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48335808848564965
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2552870090634441
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40019791666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3773271276595745
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.397
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Neptuno-Alpha/511ac4a5-6fc8-4338-845d-859d73d57678.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Neptuno-Alpha/511ac4a5-6fc8-4338-845d-859d73d57678.json
deleted file mode 100644
index 7fd98b55665de8c96542e6c6310d65263d8cf988..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Neptuno-Alpha/511ac4a5-6fc8-4338-845d-859d73d57678.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_Neptuno-Alpha/1762652579.857697",
- "retrieved_timestamp": "1762652579.857698",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/Neptuno-Alpha",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/Neptuno-Alpha"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3779649108809071
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49247749379461303
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18353474320241692
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43706249999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3767453457446808
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.397
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Oxyge1-33B/ee17e3a4-2036-4e57-9ada-51fe6d23ffac.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Oxyge1-33B/ee17e3a4-2036-4e57-9ada-51fe6d23ffac.json
deleted file mode 100644
index 927bf0e356ccb02013feae7a9559885de518f9a9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Oxyge1-33B/ee17e3a4-2036-4e57-9ada-51fe6d23ffac.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_Oxyge1-33B/1762652579.8578959",
- "retrieved_timestamp": "1762652579.857897",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/Oxyge1-33B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/Oxyge1-33B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4548265269484966
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7033278292161169
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4962235649546828
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3825503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5007812500000001
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5909242021276596
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Qwen2.5-1B-Instruct/da01b31f-dde8-45dd-b793-c8258a09ddee.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Qwen2.5-1B-Instruct/da01b31f-dde8-45dd-b793-c8258a09ddee.json
deleted file mode 100644
index 9596418cfec466771a28fb3abecb9af214e51922..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Qwen2.5-1B-Instruct/da01b31f-dde8-45dd-b793-c8258a09ddee.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_Qwen2.5-1B-Instruct/1762652579.858331",
- "retrieved_timestamp": "1762652579.858331",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/Qwen2.5-1B-Instruct",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/Qwen2.5-1B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17513198313807365
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30271528035563927
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.006042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2558724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33688541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12134308510638298
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.988
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-0.5B/7763650a-8a37-41f2-aadd-b1db7b41d0b3.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-0.5B/7763650a-8a37-41f2-aadd-b1db7b41d0b3.json
deleted file mode 100644
index 3a8ba2f4a0917add07c6f862dec57d1a3f81c7d0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-0.5B/7763650a-8a37-41f2-aadd-b1db7b41d0b3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SJT-0.5B/1762652579.858787",
- "retrieved_timestamp": "1762652579.858787",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SJT-0.5B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SJT-0.5B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24247662867857286
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33055365550588683
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05211480362537765
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27181208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31958333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18907912234042554
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-1.5B-Alpha-1.1/e3f05df1-a653-41a0-983a-4a7d86b85c60.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-1.5B-Alpha-1.1/e3f05df1-a653-41a0-983a-4a7d86b85c60.json
deleted file mode 100644
index a99b3ddb83ab47e45625de0ff4ae9a73e94f5e75..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-1.5B-Alpha-1.1/e3f05df1-a653-41a0-983a-4a7d86b85c60.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SJT-1.5B-Alpha-1.1/1762652579.859199",
- "retrieved_timestamp": "1762652579.859199",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SJT-1.5B-Alpha-1.1",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SJT-1.5B-Alpha-1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3439429602344003
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4243160272518483
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09592145015105741
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42391666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.296625664893617
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-1.5B-Alpha/21472871-fe74-447a-894c-80d77ae4ad0a.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-1.5B-Alpha/21472871-fe74-447a-894c-80d77ae4ad0a.json
deleted file mode 100644
index 0b5ea9300400c48ef08b417502a4009f1d8ccef5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-1.5B-Alpha/21472871-fe74-447a-894c-80d77ae4ad0a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SJT-1.5B-Alpha/1762652579.858988",
- "retrieved_timestamp": "1762652579.858989",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SJT-1.5B-Alpha",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SJT-1.5B-Alpha"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3448671746521452
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4240819448548446
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09969788519637462
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4226145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961269946808511
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-1.7B/6e2f01c1-ba87-4687-9db1-a0c0004bdfe1.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-1.7B/6e2f01c1-ba87-4687-9db1-a0c0004bdfe1.json
deleted file mode 100644
index b5152c089c67c15ad02deba0d260e6c5ab5ec88d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-1.7B/6e2f01c1-ba87-4687-9db1-a0c0004bdfe1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SJT-1.7B/1762652579.859416",
- "retrieved_timestamp": "1762652579.8594172",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SJT-1.7B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SJT-1.7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17762980004166723
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2934008926922806
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0015105740181268882
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24161073825503357
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39641666666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11328125
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.684
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-14B/1169b5fd-9418-4986-940a-276d163431c0.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-14B/1169b5fd-9418-4986-940a-276d163431c0.json
deleted file mode 100644
index 8d3c362058eecc4827e2e4267ef87b29d2245db1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-14B/1169b5fd-9418-4986-940a-276d163431c0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SJT-14B/1762652579.8596292",
- "retrieved_timestamp": "1762652579.85963",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SJT-14B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SJT-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5494233079340594
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6536135646865123
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38444108761329304
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38674496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.476625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5380651595744681
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-2.4B/30b98827-5afb-4bfe-b765-9c81cb4580f4.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-2.4B/30b98827-5afb-4bfe-b765-9c81cb4580f4.json
deleted file mode 100644
index dc5b5c94e3ace4dbd7a3c0d68e8de79087845380..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-2.4B/30b98827-5afb-4bfe-b765-9c81cb4580f4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SJT-2.4B/1762652579.859841",
- "retrieved_timestamp": "1762652579.859841",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SJT-2.4B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SJT-2.4B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28042039566128985
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.349012395546882
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02190332326283988
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2558724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36990624999999994
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1858377659574468
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 2.432
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-24B-Alpha/f86649f8-8962-4496-8cd8-fed702a7e63b.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-24B-Alpha/f86649f8-8962-4496-8cd8-fed702a7e63b.json
deleted file mode 100644
index 79d30ce367ac0e90b7306c6e0eb167416dafd63d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-24B-Alpha/f86649f8-8962-4496-8cd8-fed702a7e63b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SJT-24B-Alpha/1762652579.860041",
- "retrieved_timestamp": "1762652579.860041",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SJT-24B-Alpha",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SJT-24B-Alpha"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3206370208823699
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6080838080485248
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25302114803625375
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45947916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48570478723404253
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 24.125
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-2B-V1.1/b4e467a7-3f2d-438a-8c42-1f7da1aafd20.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-2B-V1.1/b4e467a7-3f2d-438a-8c42-1f7da1aafd20.json
deleted file mode 100644
index dc3b675e07af3f2b9a4c47edde392569f4e45382..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-2B-V1.1/b4e467a7-3f2d-438a-8c42-1f7da1aafd20.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SJT-2B-V1.1/1762652579.860439",
- "retrieved_timestamp": "1762652579.860439",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SJT-2B-V1.1",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SJT-2B-V1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3977235956151899
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39838417813569243
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04833836858006042
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42993750000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21243351063829788
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-2B/f720d81c-04e1-4f8a-b452-ae52cc7d9fb2.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-2B/f720d81c-04e1-4f8a-b452-ae52cc7d9fb2.json
deleted file mode 100644
index 71dc83a6b3601ec51b96ebf4b30fb1f4ea3b09f1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-2B/f720d81c-04e1-4f8a-b452-ae52cc7d9fb2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SJT-2B/1762652579.8602371",
- "retrieved_timestamp": "1762652579.860238",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SJT-2B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SJT-2B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21507378200951255
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29364597509285106
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0007552870090634441
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24161073825503357
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35641666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11868351063829788
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-3.7B/e82f1a2e-f679-47b8-9fbb-a53116e2195b.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-3.7B/e82f1a2e-f679-47b8-9fbb-a53116e2195b.json
deleted file mode 100644
index 646383ee4a0f4f9235e984ddf748dca7f56d152e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-3.7B/e82f1a2e-f679-47b8-9fbb-a53116e2195b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SJT-3.7B/1762652579.860638",
- "retrieved_timestamp": "1762652579.8606389",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SJT-3.7B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SJT-3.7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10776184966998675
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3393045259885476
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.012084592145015106
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2558724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36171875000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1505152925531915
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.783
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-4B/5115cea0-d3bf-486b-9609-36698e845653.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-4B/5115cea0-d3bf-486b-9609-36698e845653.json
deleted file mode 100644
index e8dc13ef46324169549d8f40defd568796387675..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-4B/5115cea0-d3bf-486b-9609-36698e845653.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SJT-4B/1762652579.8608499",
- "retrieved_timestamp": "1762652579.860851",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SJT-4B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SJT-4B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4077403511571519
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4885743296577029
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11555891238670694
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4779583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.328125
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.821
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-7.5B/57934f76-c8bd-4264-a3b4-14234dda0719.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-7.5B/57934f76-c8bd-4264-a3b4-14234dda0719.json
deleted file mode 100644
index 2a10c73b36680bcbaa528fd9069b2177d86def5c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-7.5B/57934f76-c8bd-4264-a3b4-14234dda0719.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SJT-7.5B/1762652579.861058",
- "retrieved_timestamp": "1762652579.861058",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SJT-7.5B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SJT-7.5B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42232831110342783
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5367364587851736
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21676737160120846
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3263422818791946
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43988541666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3951130319148936
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 7.456
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-7B-V1.1-Multilingal/03cb237a-0519-449c-b9c7-d9fbb4d119cd.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-7B-V1.1-Multilingal/03cb237a-0519-449c-b9c7-d9fbb4d119cd.json
deleted file mode 100644
index 41d4bc8e6a099045da42cffd251695823af4b46a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-7B-V1.1-Multilingal/03cb237a-0519-449c-b9c7-d9fbb4d119cd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SJT-7B-V1.1-Multilingal/1762652579.861463",
- "retrieved_timestamp": "1762652579.861464",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SJT-7B-V1.1-Multilingal",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SJT-7B-V1.1-Multilingal"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19494053555676716
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2919597646466201
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.004531722054380665
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.362125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11369680851063829
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-7B-V1.1/b1527426-9cc0-4eb5-af52-30e36e0e04fd.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-7B-V1.1/b1527426-9cc0-4eb5-af52-30e36e0e04fd.json
deleted file mode 100644
index cea7e30753a4e9ae3a8cf7b64c4e39dc5a7c74d9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-7B-V1.1/b1527426-9cc0-4eb5-af52-30e36e0e04fd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SJT-7B-V1.1/1762652579.861262",
- "retrieved_timestamp": "1762652579.861263",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SJT-7B-V1.1",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SJT-7B-V1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4702888336281067
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5418885259534293
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.243202416918429
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3338926174496644
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44106249999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.441156914893617
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-8B-V1.1/0cf37c9e-9218-4366-8065-befea0d2b749.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-8B-V1.1/0cf37c9e-9218-4366-8065-befea0d2b749.json
deleted file mode 100644
index d5d7ba32a2c5fba8608c88de6eb29cbbd303a27e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-8B-V1.1/0cf37c9e-9218-4366-8065-befea0d2b749.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SJT-8B-V1.1/1762652579.8618612",
- "retrieved_timestamp": "1762652579.861862",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SJT-8B-V1.1",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SJT-8B-V1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4620706392372239
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5120768392487195
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20694864048338368
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33640939597315433
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4266145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4231216755319149
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 8.545
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-8B/cb136400-7d0e-4194-9a45-1646ff8cac95.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-8B/cb136400-7d0e-4194-9a45-1646ff8cac95.json
deleted file mode 100644
index 136152a74f0266944983ec8d55fa8307244b1d16..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-8B/cb136400-7d0e-4194-9a45-1646ff8cac95.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SJT-8B/1762652579.861662",
- "retrieved_timestamp": "1762652579.8616629",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SJT-8B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SJT-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6534871917623019
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5281955607099067
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2537764350453172
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3296979865771812
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4079791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4266123670212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 8.548
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-900M/ff057dd9-0102-485d-88d7-7e50145b5f7e.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-900M/ff057dd9-0102-485d-88d7-7e50145b5f7e.json
deleted file mode 100644
index 04d3a669eb17c081a477ad5e487cc355c9646422..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-900M/ff057dd9-0102-485d-88d7-7e50145b5f7e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SJT-900M/1762652579.862072",
- "retrieved_timestamp": "1762652579.8620732",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SJT-900M",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SJT-900M"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2410027615615456
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31692036321713823
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.013595166163141994
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2533557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35945833333333327
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11419547872340426
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.899
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-Moe2x7.5B/e95c6f08-ab57-49a2-a83b-6a77b5ab69d9.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-Moe2x7.5B/e95c6f08-ab57-49a2-a83b-6a77b5ab69d9.json
deleted file mode 100644
index 1f4af8d6739345a5d507c7441430c8ff355d5f91..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJT-Moe2x7.5B/e95c6f08-ab57-49a2-a83b-6a77b5ab69d9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SJT-Moe2x7.5B/1762652579.862277",
- "retrieved_timestamp": "1762652579.862278",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SJT-Moe2x7.5B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SJT-Moe2x7.5B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41166216749336204
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5370697921185069
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21450151057401812
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3263422818791946
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43988541666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3953623670212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 13.401
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJTPass-2/7f508bd9-7f95-453d-9e96-747ce91a64b3.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJTPass-2/7f508bd9-7f95-453d-9e96-747ce91a64b3.json
deleted file mode 100644
index f591f012376fae63a98546e6a844c91937bc65b4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJTPass-2/7f508bd9-7f95-453d-9e96-747ce91a64b3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SJTPass-2/1762652579.8624809",
- "retrieved_timestamp": "1762652579.8624818",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SJTPass-2",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SJTPass-2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24002867945939
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33022032217255354
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.052870090634441085
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32225
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1901595744680851
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJTPass-4/f814a3bd-b82e-4769-9ef7-a4670420bca0.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJTPass-4/f814a3bd-b82e-4769-9ef7-a4670420bca0.json
deleted file mode 100644
index 3021202b3ddcbf467cc860ae18d4a148dec33b23..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJTPass-4/f814a3bd-b82e-4769-9ef7-a4670420bca0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SJTPass-4/1762652579.8627222",
- "retrieved_timestamp": "1762652579.8627222",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SJTPass-4",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SJTPass-4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19129354557019818
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2963644180215358
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0022658610271903325
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38981249999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10829454787234043
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.167
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJTPass-5/5d5bda4e-8994-4cef-9772-d4bd435e9644.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJTPass-5/5d5bda4e-8994-4cef-9772-d4bd435e9644.json
deleted file mode 100644
index 034438e4db724a0259b5a4ccb6b88d56344cc7a8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SJTPass-5/5d5bda4e-8994-4cef-9772-d4bd435e9644.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SJTPass-5/1762652579.862921",
- "retrieved_timestamp": "1762652579.862922",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SJTPass-5",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SJTPass-5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24247662867857286
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31029599812555747
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015861027190332326
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26677852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3794270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13272938829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.809
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba-Passthrough-2/df1e7d22-c300-4466-92b7-770078a1dc09.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba-Passthrough-2/df1e7d22-c300-4466-92b7-770078a1dc09.json
deleted file mode 100644
index 3f68336968aa0fa0ccfbbaf5e561116a7d7a0bb3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba-Passthrough-2/df1e7d22-c300-4466-92b7-770078a1dc09.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_Saba-Passthrough-2/1762652579.863117",
- "retrieved_timestamp": "1762652579.8631182",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/Saba-Passthrough-2",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/Saba-Passthrough-2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16913677930114318
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36724803467499195
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0007552870090634441
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3844479166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20769614361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.087
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1-1.8B/d8cc8e9e-b672-4b26-a454-f97cd7a08648.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1-1.8B/d8cc8e9e-b672-4b26-a454-f97cd7a08648.json
deleted file mode 100644
index 768acf6d257c86a97dcde30b4c9fb1d39aba88f9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1-1.8B/d8cc8e9e-b672-4b26-a454-f97cd7a08648.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_Saba1-1.8B/1762652579.863334",
- "retrieved_timestamp": "1762652579.863334",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/Saba1-1.8B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/Saba1-1.8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3332768166243345
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4147375470428282
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1540785498489426
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4238854166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2925531914893617
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1-7B/1200ed26-8450-4788-a1bf-20f2c9b9b2c0.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1-7B/1200ed26-8450-4788-a1bf-20f2c9b9b2c0.json
deleted file mode 100644
index eb0224d69fc14697f421c72452dcd9e4449ce5e6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1-7B/1200ed26-8450-4788-a1bf-20f2c9b9b2c0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_Saba1-7B/1762652579.863542",
- "retrieved_timestamp": "1762652579.863542",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/Saba1-7B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/Saba1-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45847351693506566
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5489063327459239
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36631419939577037
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47932291666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43758311170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1.5-1.5B/a76090d4-a0fb-45c8-b28c-fa225ec3d11c.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1.5-1.5B/a76090d4-a0fb-45c8-b28c-fa225ec3d11c.json
deleted file mode 100644
index 49b8aef61e3db1602719460dc0ffbb298f6052b0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1.5-1.5B/a76090d4-a0fb-45c8-b28c-fa225ec3d11c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_Saba1.5-1.5B/1762652579.8637571",
- "retrieved_timestamp": "1762652579.863758",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/Saba1.5-1.5B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/Saba1.5-1.5B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3332768166243345
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4147375470428282
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1540785498489426
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4238854166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2925531914893617
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1.5-Pro-3B/14e1dd44-92f1-4d97-be67-fa98c9802ff1.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1.5-Pro-3B/14e1dd44-92f1-4d97-be67-fa98c9802ff1.json
deleted file mode 100644
index 4d4c3ce485317df00a5921e2bf4dbfc15dfa2c1d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba1.5-Pro-3B/14e1dd44-92f1-4d97-be67-fa98c9802ff1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_Saba1.5-Pro-3B/1762652579.863965",
- "retrieved_timestamp": "1762652579.863966",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/Saba1.5-Pro-3B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/Saba1.5-Pro-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23860468002677343
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3622910501405146
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.027190332326283987
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44054166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19581117021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 2.9
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba2-14B-Preview/e3e0180f-bbd8-491a-a41b-54801e9f71de.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba2-14B-Preview/e3e0180f-bbd8-491a-a41b-54801e9f71de.json
deleted file mode 100644
index 7e0b77da335d9b3b5e35174b3d48d9efa02e1df5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba2-14B-Preview/e3e0180f-bbd8-491a-a41b-54801e9f71de.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_Saba2-14B-Preview/1762652579.864167",
- "retrieved_timestamp": "1762652579.864168",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/Saba2-14B-Preview",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/Saba2-14B-Preview"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4721871301480073
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.649628096691823
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31268882175226587
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3825503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4781458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5383976063829787
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba2-3B/b759686f-082e-44b6-9cf8-44a48f66c136.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba2-3B/b759686f-082e-44b6-9cf8-44a48f66c136.json
deleted file mode 100644
index ea9b643bf972cd9de786430e69bdc8639a21bca2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saba2-3B/b759686f-082e-44b6-9cf8-44a48f66c136.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_Saba2-3B/1762652579.864372",
- "retrieved_timestamp": "1762652579.864373",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/Saba2-3B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/Saba2-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28651533486704167
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28011877359000464
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.006042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2617449664429531
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39269791666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12101063829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Sailor-japanese/8449b01f-c489-4008-97d4-aa3f0394cda4.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Sailor-japanese/8449b01f-c489-4008-97d4-aa3f0394cda4.json
deleted file mode 100644
index 61fe6c422dc38e83f9373693824817f7f76de26b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Sailor-japanese/8449b01f-c489-4008-97d4-aa3f0394cda4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_Sailor-japanese/1762652579.864587",
- "retrieved_timestamp": "1762652579.864588",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/Sailor-japanese",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/Sailor-japanese"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16046866757979938
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2912583602962783
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0030211480362537764
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2533557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3911770833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11643949468085106
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-1.5B/854baf47-af97-46dd-acfe-a3710976fd57.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-1.5B/854baf47-af97-46dd-acfe-a3710976fd57.json
deleted file mode 100644
index 7945fdd32ea6ea0615220ee5919b35f3abb23799..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-1.5B/854baf47-af97-46dd-acfe-a3710976fd57.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_Saka-1.5B/1762652579.8647912",
- "retrieved_timestamp": "1762652579.8647912",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/Saka-1.5B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/Saka-1.5B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726266306732802
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3987868899865206
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08006042296072508
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37390625000000005
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24152260638297873
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-14B/53556d59-3b32-44bc-9932-c52f05939b57.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-14B/53556d59-3b32-44bc-9932-c52f05939b57.json
deleted file mode 100644
index c5cd6c135a1f37ea42cf74c544f31d94e39ea79e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-14B/53556d59-3b32-44bc-9932-c52f05939b57.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_Saka-14B/1762652579.8649821",
- "retrieved_timestamp": "1762652579.864983",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/Saka-14B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/Saka-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7174341857382855
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6496945295195891
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4093655589123867
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3959731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48859375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.539561170212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-24B/a5e13aa9-bf5f-4201-bc93-504521141f43.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-24B/a5e13aa9-bf5f-4201-bc93-504521141f43.json
deleted file mode 100644
index a67f288ee8fe2ac50a2d585a669622c600e0edca..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-24B/a5e13aa9-bf5f-4201-bc93-504521141f43.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_Saka-24B/1762652579.865175",
- "retrieved_timestamp": "1762652579.865176",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/Saka-24B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/Saka-24B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38186123928952953
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6072116494463233
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18051359516616314
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3422818791946309
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45408333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4765625
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 23.572
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-7.2B/07f036d7-af59-49a8-8346-8a9a9dd21439.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-7.2B/07f036d7-af59-49a8-8346-8a9a9dd21439.json
deleted file mode 100644
index af1b8f4d156fd9181e28a0f6f0cc9b9325b96a76..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-7.2B/07f036d7-af59-49a8-8346-8a9a9dd21439.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_Saka-7.2B/1762652579.86556",
- "retrieved_timestamp": "1762652579.865563",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/Saka-7.2B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/Saka-7.2B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1544989516704566
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2945156585364917
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23909395973154363
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37105208333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11602393617021277
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 7.292
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-7.6B/10923a84-a611-4830-b84c-0e91c0628541.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-7.6B/10923a84-a611-4830-b84c-0e91c0628541.json
deleted file mode 100644
index 5f9be49a1d7a8d8fe5999c7ff0f711bfebaa1cbe..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Saka-7.6B/10923a84-a611-4830-b84c-0e91c0628541.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_Saka-7.6B/1762652579.865891",
- "retrieved_timestamp": "1762652579.8658922",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/Saka-7.6B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/Saka-7.6B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45242844541372446
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5655284792075981
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3255287009063444
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4489375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45403922872340424
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SakaMoe-3x1.6B-Instruct/e806f2f4-0a10-49f6-a67e-dc1dd0a59ede.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SakaMoe-3x1.6B-Instruct/e806f2f4-0a10-49f6-a67e-dc1dd0a59ede.json
deleted file mode 100644
index 63079d6d2fd71c6411ffa22c52b6a1ceca8141c8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SakaMoe-3x1.6B-Instruct/e806f2f4-0a10-49f6-a67e-dc1dd0a59ede.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SakaMoe-3x1.6B-Instruct/1762652579.866188",
- "retrieved_timestamp": "1762652579.8661902",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SakaMoe-3x1.6B-Instruct",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SakaMoe-3x1.6B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23708094522533543
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.328247997224552
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.054380664652567974
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26677852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33421875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18824800531914893
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2MoeForCausalLM",
- "params_billions": 1.572
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SakalFusion-7B-Alpha/2329f6f2-228a-400b-9b2d-4ad6dd278b79.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SakalFusion-7B-Alpha/2329f6f2-228a-400b-9b2d-4ad6dd278b79.json
deleted file mode 100644
index 3eb02591a37480939f2b9dbd5195d602688b4348..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SakalFusion-7B-Alpha/2329f6f2-228a-400b-9b2d-4ad6dd278b79.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SakalFusion-7B-Alpha/1762652579.866478",
- "retrieved_timestamp": "1762652579.8664792",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SakalFusion-7B-Alpha",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SakalFusion-7B-Alpha"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5289653674472622
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.559133672829116
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38444108761329304
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32550335570469796
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4581458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4473902925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SakalFusion-7B-Beta/537a91f9-b1f3-49bf-bef7-a9ef8578c284.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SakalFusion-7B-Beta/537a91f9-b1f3-49bf-bef7-a9ef8578c284.json
deleted file mode 100644
index 5687503db02db18eadf46b41a473e48b023ccafe..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_SakalFusion-7B-Beta/537a91f9-b1f3-49bf-bef7-a9ef8578c284.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_SakalFusion-7B-Beta/1762652579.866734",
- "retrieved_timestamp": "1762652579.8667352",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/SakalFusion-7B-Beta",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/SakalFusion-7B-Beta"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18090222830977362
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2881298650933641
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24328859060402686
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3872083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10895944148936171
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Tara-3.8B-v1.1/cd884e16-7e4d-4d17-8bad-5819604e0384.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Tara-3.8B-v1.1/cd884e16-7e4d-4d17-8bad-5819604e0384.json
deleted file mode 100644
index 7973978eef9ff8f4ae88ffd6ee96e802d8a2eeac..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_Tara-3.8B-v1.1/cd884e16-7e4d-4d17-8bad-5819604e0384.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_Tara-3.8B-v1.1/1762652579.866961",
- "retrieved_timestamp": "1762652579.866962",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/Tara-3.8B-v1.1",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/Tara-3.8B-v1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40621661635571393
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4885743296577029
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11555891238670694
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4779583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.328125
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.821
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-1.1-3B/9da5b03b-0207-4e98-a5bf-5a658225e78f.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-1.1-3B/9da5b03b-0207-4e98-a5bf-5a658225e78f.json
deleted file mode 100644
index c894a3a746a8c76ecf8475c9135e48c15b686a65..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-1.1-3B/9da5b03b-0207-4e98-a5bf-5a658225e78f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_light-1.1-3B/1762652579.867201",
- "retrieved_timestamp": "1762652579.867202",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/light-1.1-3B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/light-1.1-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27345110972220377
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28027723572953045
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.011329305135951661
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2617449664429531
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3900625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12092752659574468
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-3B/a1593642-8d60-4680-90aa-8c3789d536d6.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-3B/a1593642-8d60-4680-90aa-8c3789d536d6.json
deleted file mode 100644
index 1fa7f3cc12778b7f166119f4a2e364ac1d505d0a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-3B/a1593642-8d60-4680-90aa-8c3789d536d6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_light-3B/1762652579.8674219",
- "retrieved_timestamp": "1762652579.867423",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/light-3B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/light-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5337360425892188
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4831034368803701
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2590634441087613
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40149999999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3774933510638298
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.397
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-3b-beta/2a4293ca-2434-4752-a08f-163257e0fde4.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-3b-beta/2a4293ca-2434-4752-a08f-163257e0fde4.json
deleted file mode 100644
index f77fe325e2e6149d2913d0262e2dbeb171756df9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-3b-beta/2a4293ca-2434-4752-a08f-163257e0fde4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_light-3b-beta/1762652579.867648",
- "retrieved_timestamp": "1762652579.867649",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/light-3b-beta",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/light-3b-beta"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5485489612007252
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48152297262112204
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.277190332326284
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40146875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3758311170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.397
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-7b-beta/a66efce1-f6d2-4fad-964b-cc4e80012145.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-7b-beta/a66efce1-f6d2-4fad-964b-cc4e80012145.json
deleted file mode 100644
index fd74e4de56c93411f29577eda52b763bac64e5ff..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_light-7b-beta/a66efce1-f6d2-4fad-964b-cc4e80012145.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_light-7b-beta/1762652579.867865",
- "retrieved_timestamp": "1762652579.867866",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/light-7b-beta",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/light-7b-beta"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6233870574520051
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5548193064288276
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3768882175226586
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3213087248322148
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42906249999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.445561835106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_llama-3-yanyuedao-8b-instruct/cb550de6-4cd6-411e-9426-dc12421404ad.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_llama-3-yanyuedao-8b-instruct/cb550de6-4cd6-411e-9426-dc12421404ad.json
deleted file mode 100644
index 51a2babb30ae1a3b7b82b87e78b221f2234ee332..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_llama-3-yanyuedao-8b-instruct/cb550de6-4cd6-411e-9426-dc12421404ad.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_llama-3-yanyuedao-8b-instruct/1762652579.8681011",
- "retrieved_timestamp": "1762652579.8681011",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/llama-3-yanyuedao-8b-instruct",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/llama-3-yanyuedao-8b-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21857116894284942
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43497849055247495
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03851963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41985416666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29105718085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_magro-7B/c2c87be8-4137-4bcc-8cbe-4589d193e94d.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_magro-7B/c2c87be8-4137-4bcc-8cbe-4589d193e94d.json
deleted file mode 100644
index a73d9cc96e36757600076171228ebdaed8410c10..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_magro-7B/c2c87be8-4137-4bcc-8cbe-4589d193e94d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_magro-7B/1762652579.868387",
- "retrieved_timestamp": "1762652579.8683882",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/magro-7B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/magro-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13439008497453425
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4185526485966236
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02039274924471299
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44598958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2765126329787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_mergekit-01/dd01becb-c2c0-4593-ac1e-db2ff11aa17b.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_mergekit-01/dd01becb-c2c0-4593-ac1e-db2ff11aa17b.json
deleted file mode 100644
index 003d6d32f0e2be5ef05c7a1b628d357f2363f312..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_mergekit-01/dd01becb-c2c0-4593-ac1e-db2ff11aa17b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_mergekit-01/1762652579.868608",
- "retrieved_timestamp": "1762652579.868609",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/mergekit-01",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/mergekit-01"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6233870574520051
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5548193064288276
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3768882175226586
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3213087248322148
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42906249999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.445561835106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_mergekit-della_linear-vmeykci/a4bd1768-2382-47fe-a8bd-6e42bda06d2f.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_mergekit-della_linear-vmeykci/a4bd1768-2382-47fe-a8bd-6e42bda06d2f.json
deleted file mode 100644
index b418895008a2b2c0239bf5dc7d1bfde643484623..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_mergekit-della_linear-vmeykci/a4bd1768-2382-47fe-a8bd-6e42bda06d2f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_mergekit-della_linear-vmeykci/1762652579.868854",
- "retrieved_timestamp": "1762652579.868856",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/mergekit-della_linear-vmeykci",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/mergekit-della_linear-vmeykci"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1126078804239418
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28155028620092587
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.010574018126888218
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38968749999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10887632978723404
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_model-3/efd2a4d7-afcd-4653-ad4f-7d4f7206be95.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_model-3/efd2a4d7-afcd-4653-ad4f-7d4f7206be95.json
deleted file mode 100644
index e427e1d1b3619d7156bb0751ce7835e41fe39402..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_model-3/efd2a4d7-afcd-4653-ad4f-7d4f7206be95.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_model-3/1762652579.869146",
- "retrieved_timestamp": "1762652579.869148",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/model-3",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/model-3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6263846593704703
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.554216994021922
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37084592145015105
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3213087248322148
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4263958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4454787234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_tara-3.8B/695d7b01-14e6-40e4-b398-541e87a812c8.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_tara-3.8B/695d7b01-14e6-40e4-b398-541e87a812c8.json
deleted file mode 100644
index a1fb0e1e47898bf443494118579cc5992a6ccd14..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_tara-3.8B/695d7b01-14e6-40e4-b398-541e87a812c8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_tara-3.8B/1762652579.86961",
- "retrieved_timestamp": "1762652579.869611",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/tara-3.8B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/tara-3.8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4077403511571519
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4885743296577029
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11555891238670694
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4779583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.328125
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.821
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B-v0.2/f3f888bb-5e99-4521-83b2-4e182f492220.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B-v0.2/f3f888bb-5e99-4521-83b2-4e182f492220.json
deleted file mode 100644
index 97366d5db746fe564a63e0b9cf92e99ae2f2bd7b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B-v0.2/f3f888bb-5e99-4521-83b2-4e182f492220.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-14B-v0.2/1762652579.870035",
- "retrieved_timestamp": "1762652579.870036",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/ultiima-14B-v0.2",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/ultiima-14B-v0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7069930007934502
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6472012505703305
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3995468277945619
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3825503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4793541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5387300531914894
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B-v0.3/5cd3794f-990f-4965-9fbc-7faf3216e808.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B-v0.3/5cd3794f-990f-4965-9fbc-7faf3216e808.json
deleted file mode 100644
index b88384d5d84be348ea7d40bc9e3471dfea532307..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B-v0.3/5cd3794f-990f-4965-9fbc-7faf3216e808.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-14B-v0.3/1762652579.870242",
- "retrieved_timestamp": "1762652579.870243",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/ultiima-14B-v0.3",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/ultiima-14B-v0.3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7040452665593957
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.639820771660141
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39652567975830816
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3766778523489933
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47541666666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5336602393617021
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B-v0.4/688f9751-e261-41c6-a7a4-2dc33a702e09.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B-v0.4/688f9751-e261-41c6-a7a4-2dc33a702e09.json
deleted file mode 100644
index 9de242835791776c56e15374ade9b1d45943016b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B-v0.4/688f9751-e261-41c6-a7a4-2dc33a702e09.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-14B-v0.4/1762652579.8704672",
- "retrieved_timestamp": "1762652579.8704839",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/ultiima-14B-v0.4",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/ultiima-14B-v0.4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3008284684636764
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6420007859105136
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35347432024169184
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3959731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4885625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.527842420212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B/abf448a9-decf-432d-8883-6e1492a7c040.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B/abf448a9-decf-432d-8883-6e1492a7c040.json
deleted file mode 100644
index 3d84876d181c921e62596866988e684fb249a0e4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-14B/abf448a9-decf-432d-8883-6e1492a7c040.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-14B/1762652579.869824",
- "retrieved_timestamp": "1762652579.8698251",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/ultiima-14B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/ultiima-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5700563394016764
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6491153472177067
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4697885196374622
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37416107382550334
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4717604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5380651595744681
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-32B/18f686ca-453d-4a0c-9f1a-e2f4ba53399c.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-32B/18f686ca-453d-4a0c-9f1a-e2f4ba53399c.json
deleted file mode 100644
index c9cdb3fcda0de2e50dccb78a7c34e5603898bd1d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-32B/18f686ca-453d-4a0c-9f1a-e2f4ba53399c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-32B/1762652579.870782",
- "retrieved_timestamp": "1762652579.870784",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/ultiima-32B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/ultiima-32B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6854357549080883
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7037285782797875
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4962235649546828
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4994791666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5910073138297872
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-72B-v1.5/258aae52-b934-4ba1-bdb0-e15bd8277234.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-72B-v1.5/258aae52-b934-4ba1-bdb0-e15bd8277234.json
deleted file mode 100644
index affa15c00db9475fcebac25afe8d505ca4b31bde..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-72B-v1.5/258aae52-b934-4ba1-bdb0-e15bd8277234.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-72B-v1.5/1762652579.8712351",
- "retrieved_timestamp": "1762652579.8712351",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/ultiima-72B-v1.5",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/ultiima-72B-v1.5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6549610588793291
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7391727188223717
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4395770392749245
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41359060402684567
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46909375000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6053856382978723
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.706
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-72B/cce8480a-353b-4f9b-8f6f-b2f1e9ae601a.json b/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-72B/cce8480a-353b-4f9b-8f6f-b2f1e9ae601a.json
deleted file mode 100644
index 518d2e5a4ad84ba22659e1d316082d43232797aa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sakalti/Sakalti_ultiima-72B/cce8480a-353b-4f9b-8f6f-b2f1e9ae601a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_ultiima-72B/1762652579.8710139",
- "retrieved_timestamp": "1762652579.8710148",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/ultiima-72B",
- "developer": "Sakalti",
- "inference_platform": "unknown",
- "id": "Sakalti/ultiima-72B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7140121544169471
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7217809739144654
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5354984894259819
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41442953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46518750000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.590591755319149
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.706
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Salesforce/Salesforce_LLaMA-3-8B-SFR-Iterative-DPO-R/1bf65062-4526-407d-ba4f-866b045dbf3b.json b/leaderboard_data/HFOpenLLMv2/Salesforce/Salesforce_LLaMA-3-8B-SFR-Iterative-DPO-R/1bf65062-4526-407d-ba4f-866b045dbf3b.json
deleted file mode 100644
index ca2ab5dc0cf98b6871e39ca85df6d2dd1b22051e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Salesforce/Salesforce_LLaMA-3-8B-SFR-Iterative-DPO-R/1bf65062-4526-407d-ba4f-866b045dbf3b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Salesforce_LLaMA-3-8B-SFR-Iterative-DPO-R/1762652579.8714519",
- "retrieved_timestamp": "1762652579.8714519",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R",
- "developer": "Salesforce",
- "inference_platform": "unknown",
- "id": "Salesforce/LLaMA-3-8B-SFR-Iterative-DPO-R"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38156203318306536
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5011950469666927
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09138972809667674
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.287751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36333333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3172373670212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SanjiWatsuki/SanjiWatsuki_Kunoichi-DPO-v2-7B/dc7243af-efa9-4169-8d31-36ef75dfe2e3.json b/leaderboard_data/HFOpenLLMv2/SanjiWatsuki/SanjiWatsuki_Kunoichi-DPO-v2-7B/dc7243af-efa9-4169-8d31-36ef75dfe2e3.json
deleted file mode 100644
index 90fa8e33c0ae7b2da0a66ddc1e9150d8e438144d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SanjiWatsuki/SanjiWatsuki_Kunoichi-DPO-v2-7B/dc7243af-efa9-4169-8d31-36ef75dfe2e3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SanjiWatsuki_Kunoichi-DPO-v2-7B/1762652579.871708",
- "retrieved_timestamp": "1762652579.871708",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SanjiWatsuki/Kunoichi-DPO-v2-7B",
- "developer": "SanjiWatsuki",
- "inference_platform": "unknown",
- "id": "SanjiWatsuki/Kunoichi-DPO-v2-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5431034100630772
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4415592450869275
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07628398791540786
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41883333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3106715425531915
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SanjiWatsuki/SanjiWatsuki_Silicon-Maid-7B/5d7ffac9-a734-44ef-aa1e-43ddbe68fd6a.json b/leaderboard_data/HFOpenLLMv2/SanjiWatsuki/SanjiWatsuki_Silicon-Maid-7B/5d7ffac9-a734-44ef-aa1e-43ddbe68fd6a.json
deleted file mode 100644
index 2a497900454dd318abab4efc3e092d93959827c6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SanjiWatsuki/SanjiWatsuki_Silicon-Maid-7B/5d7ffac9-a734-44ef-aa1e-43ddbe68fd6a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SanjiWatsuki_Silicon-Maid-7B/1762652579.87197",
- "retrieved_timestamp": "1762652579.8719711",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SanjiWatsuki/Silicon-Maid-7B",
- "developer": "SanjiWatsuki",
- "inference_platform": "unknown",
- "id": "SanjiWatsuki/Silicon-Maid-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5367835121920947
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4127972831009074
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0649546827794562
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41883333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.308344414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_70B-L3.3-Cirrus-x1/660f8ede-1b7f-4438-8a97-51db77058725.json b/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_70B-L3.3-Cirrus-x1/660f8ede-1b7f-4438-8a97-51db77058725.json
deleted file mode 100644
index a80e9f8d575747754ab1492f93851c692671527b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_70B-L3.3-Cirrus-x1/660f8ede-1b7f-4438-8a97-51db77058725.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sao10K_70B-L3.3-Cirrus-x1/1762652579.8721752",
- "retrieved_timestamp": "1762652579.8721762",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sao10K/70B-L3.3-Cirrus-x1",
- "developer": "Sao10K",
- "inference_platform": "unknown",
- "id": "Sao10K/70B-L3.3-Cirrus-x1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6680751517085777
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7028970787833794
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37386706948640486
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44966442953020136
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4841666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5378158244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_Fimbulvetr-11B-v2/135ade7c-f0d1-495a-a5b5-c95712cf0c0f.json b/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_Fimbulvetr-11B-v2/135ade7c-f0d1-495a-a5b5-c95712cf0c0f.json
deleted file mode 100644
index dd07ff86ec94eecde430de0e18f50752ff71fdbe..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_Fimbulvetr-11B-v2/135ade7c-f0d1-495a-a5b5-c95712cf0c0f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sao10K_Fimbulvetr-11B-v2/1762652579.872427",
- "retrieved_timestamp": "1762652579.872428",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sao10K/Fimbulvetr-11B-v2",
- "developer": "Sao10K",
- "inference_platform": "unknown",
- "id": "Sao10K/Fimbulvetr-11B-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5100056738343152
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4544495065184342
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06797583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43536458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33011968085106386
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.732
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-70B-Euryale-v2.1/09aab7d9-93ac-4aff-840a-d4ccfb0b469d.json b/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-70B-Euryale-v2.1/09aab7d9-93ac-4aff-840a-d4ccfb0b469d.json
deleted file mode 100644
index e1507b172713577be67e307f1c826310bcccfe31..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-70B-Euryale-v2.1/09aab7d9-93ac-4aff-840a-d4ccfb0b469d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sao10K_L3-70B-Euryale-v2.1/1762652579.872639",
- "retrieved_timestamp": "1762652579.87264",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sao10K/L3-70B-Euryale-v2.1",
- "developer": "Sao10K",
- "inference_platform": "unknown",
- "id": "Sao10K/L3-70B-Euryale-v2.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7384417789243651
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6471322811268715
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21374622356495468
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42091666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5103889627659575
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-70B-Euryale-v2.1/d730a2be-1cd8-4851-9ecf-55139af1e8f7.json b/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-70B-Euryale-v2.1/d730a2be-1cd8-4851-9ecf-55139af1e8f7.json
deleted file mode 100644
index a895d5c8cf44b0927e02fab60b4c1bca24d513d2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-70B-Euryale-v2.1/d730a2be-1cd8-4851-9ecf-55139af1e8f7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sao10K_L3-70B-Euryale-v2.1/1762652579.872864",
- "retrieved_timestamp": "1762652579.872865",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sao10K/L3-70B-Euryale-v2.1",
- "developer": "Sao10K",
- "inference_platform": "unknown",
- "id": "Sao10K/L3-70B-Euryale-v2.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7281003293483512
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6502778992745041
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22432024169184292
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41958333333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5095578457446809
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Lunaris-v1/e15ed4e3-d33f-4dad-98da-e1dad098a6a1.json b/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Lunaris-v1/e15ed4e3-d33f-4dad-98da-e1dad098a6a1.json
deleted file mode 100644
index 5fc29ffb5c9f313b79d220ddde83347c27877dee..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Lunaris-v1/e15ed4e3-d33f-4dad-98da-e1dad098a6a1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sao10K_L3-8B-Lunaris-v1/1762652579.8733618",
- "retrieved_timestamp": "1762652579.873365",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sao10K/L3-8B-Lunaris-v1",
- "developer": "Sao10K",
- "inference_platform": "unknown",
- "id": "Sao10K/L3-8B-Lunaris-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6894573066131198
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5235299282515419
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09063444108761329
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011744966442953
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3726666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3787400265957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Niitama-v1/9c10e944-3955-4478-9d07-f79769d6b884.json b/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Niitama-v1/9c10e944-3955-4478-9d07-f79769d6b884.json
deleted file mode 100644
index 48f7be488125b013c36e307884cc497e7331bf0e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Niitama-v1/9c10e944-3955-4478-9d07-f79769d6b884.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sao10K_L3-8B-Niitama-v1/1762652579.8737721",
- "retrieved_timestamp": "1762652579.873773",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sao10K/L3-8B-Niitama-v1",
- "developer": "Sao10K",
- "inference_platform": "unknown",
- "id": "Sao10K/L3-8B-Niitama-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6790659893526954
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5302980131787137
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09818731117824774
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3806666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3700964095744681
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Stheno-v3.2/85a94072-ac79-4c14-abaa-9a6424a03ab5.json b/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Stheno-v3.2/85a94072-ac79-4c14-abaa-9a6424a03ab5.json
deleted file mode 100644
index 2ea2067f9db87c6dcecee64d20516e6f651b3567..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Stheno-v3.2/85a94072-ac79-4c14-abaa-9a6424a03ab5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sao10K_L3-8B-Stheno-v3.2/1762652579.8740559",
- "retrieved_timestamp": "1762652579.874058",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sao10K/L3-8B-Stheno-v3.2",
- "developer": "Sao10K",
- "inference_platform": "unknown",
- "id": "Sao10K/L3-8B-Stheno-v3.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6872841837435781
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.522778637171633
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09290030211480363
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3793645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3768284574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Stheno-v3.3-32K/279b82ae-62b2-4703-85f2-1e79e42366f0.json b/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Stheno-v3.3-32K/279b82ae-62b2-4703-85f2-1e79e42366f0.json
deleted file mode 100644
index 1a9774c9ba17250534b0e97e108cd97a77be0e0d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_L3-8B-Stheno-v3.3-32K/279b82ae-62b2-4703-85f2-1e79e42366f0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sao10K_L3-8B-Stheno-v3.3-32K/1762652579.874314",
- "retrieved_timestamp": "1762652579.874315",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sao10K/L3-8B-Stheno-v3.3-32K",
- "developer": "Sao10K",
- "inference_platform": "unknown",
- "id": "Sao10K/L3-8B-Stheno-v3.3-32K"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46037181345496614
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3844012923008206
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.014350453172205438
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25671140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3725416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1895777925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_MN-12B-Lyra-v3/2c83813a-8254-4765-9367-efb9ad8c5e6c.json b/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_MN-12B-Lyra-v3/2c83813a-8254-4765-9367-efb9ad8c5e6c.json
deleted file mode 100644
index abcea9e76ee2582086462463fdf36f336dafedb4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sao10K/Sao10K_MN-12B-Lyra-v3/2c83813a-8254-4765-9367-efb9ad8c5e6c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sao10K_MN-12B-Lyra-v3/1762652579.874634",
- "retrieved_timestamp": "1762652579.874634",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sao10K/MN-12B-Lyra-v3",
- "developer": "Sao10K",
- "inference_platform": "unknown",
- "id": "Sao10K/MN-12B-Lyra-v3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4486063644463357
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4803954360397243
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09365558912386707
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40190624999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32488364361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V1-32B/482fbdd6-6f39-4971-ac65-1e5e181b667f.json b/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V1-32B/482fbdd6-6f39-4971-ac65-1e5e181b667f.json
deleted file mode 100644
index 74486f516a5069e8121c44ba0da09776e7f1a95a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V1-32B/482fbdd6-6f39-4971-ac65-1e5e181b667f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V1-32B/1762652579.874861",
- "retrieved_timestamp": "1762652579.8748622",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B",
- "developer": "Saxo",
- "inference_platform": "unknown",
- "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V1-32B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7971681804279312
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7000545067146033
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6027190332326284
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3624161073825503
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45378125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5792885638297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.76
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V2-32B/0b1758f7-4aee-40a2-b33e-f519107b6687.json b/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V2-32B/0b1758f7-4aee-40a2-b33e-f519107b6687.json
deleted file mode 100644
index 4ea1f796e6eaa40dec29d51cafa2eb1bafe947f6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V2-32B/0b1758f7-4aee-40a2-b33e-f519107b6687.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V2-32B/1762652579.875268",
- "retrieved_timestamp": "1762652579.8752692",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B",
- "developer": "Saxo",
- "inference_platform": "unknown",
- "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V2-32B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7956444456264933
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7023193256341814
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5664652567975831
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41663541666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5719747340425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.76
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V3-32B/b206b1c9-3469-4b77-b85a-dcd3c6394c67.json b/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V3-32B/b206b1c9-3469-4b77-b85a-dcd3c6394c67.json
deleted file mode 100644
index 271439303135401cab63848f8d5aa17c05910073..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V3-32B/b206b1c9-3469-4b77-b85a-dcd3c6394c67.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V3-32B/1762652579.875521",
- "retrieved_timestamp": "1762652579.8755221",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B",
- "developer": "Saxo",
- "inference_platform": "unknown",
- "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V3-32B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8248702332034556
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6913199237437709
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6178247734138973
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42745833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.56640625
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V4-32B/52d4b2fe-cbd1-431f-b0e7-04ebfbe852ca.json b/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V4-32B/52d4b2fe-cbd1-431f-b0e7-04ebfbe852ca.json
deleted file mode 100644
index 7898ccfa68b27955d128c5b8d5a87bbcbee37ee0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V4-32B/52d4b2fe-cbd1-431f-b0e7-04ebfbe852ca.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V4-32B/1762652579.87576",
- "retrieved_timestamp": "1762652579.8757608",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B",
- "developer": "Saxo",
- "inference_platform": "unknown",
- "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V4-32B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7630963620970137
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6920204096666581
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5362537764350453
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3615771812080537
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4642604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5752160904255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V5-32B/b1b0aac0-2921-44ab-ac1b-873b715e9b52.json b/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V5-32B/b1b0aac0-2921-44ab-ac1b-873b715e9b52.json
deleted file mode 100644
index c3bd55541a13bd8e64efd4ec233021009f5ed32b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V5-32B/b1b0aac0-2921-44ab-ac1b-873b715e9b52.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V5-32B/1762652579.876068",
- "retrieved_timestamp": "1762652579.876069",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B",
- "developer": "Saxo",
- "inference_platform": "unknown",
- "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V5-32B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7515558717536137
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6928650089977083
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5460725075528701
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35570469798657717
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47086458333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5762134308510638
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V6-32B/977a0388-5c46-42ab-bb93-91f036963f8c.json b/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V6-32B/977a0388-5c46-42ab-bb93-91f036963f8c.json
deleted file mode 100644
index 9680056367e9f48b017c3b702a5a13e1eed078ef..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Avengers-V6-32B/977a0388-5c46-42ab-bb93-91f036963f8c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Avengers-V6-32B/1762652579.87637",
- "retrieved_timestamp": "1762652579.876371",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B",
- "developer": "Saxo",
- "inference_platform": "unknown",
- "id": "Saxo/Linkbricks-Horizon-AI-Avengers-V6-32B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8208985491828349
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6889783858832969
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.622356495468278
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3347315436241611
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42742708333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5672373670212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.76
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Avengers-V2-27B/52438151-a1c8-440c-a9be-3670b18c1ef6.json b/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Avengers-V2-27B/52438151-a1c8-440c-a9be-3670b18c1ef6.json
deleted file mode 100644
index 167e6457e44b1db1c97fdca9d195789b3e151088..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Avengers-V2-27B/52438151-a1c8-440c-a9be-3670b18c1ef6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Korean-Avengers-V2-27B/1762652579.876656",
- "retrieved_timestamp": "1762652579.876657",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B",
- "developer": "Saxo",
- "inference_platform": "unknown",
- "id": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V2-27B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8145786513118525
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6463223196116569
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802114803625378
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34731543624161076
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4139375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45985704787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Avengers-V3-27B/993cc036-0e33-4d0e-b1b3-f97a9645f4c5.json b/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Avengers-V3-27B/993cc036-0e33-4d0e-b1b3-f97a9645f4c5.json
deleted file mode 100644
index 854f40a92ca139a42572400f4522b78ab5b6b596..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Avengers-V3-27B/993cc036-0e33-4d0e-b1b3-f97a9645f4c5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Korean-Avengers-V3-27B/1762652579.876898",
- "retrieved_timestamp": "1762652579.876899",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B",
- "developer": "Saxo",
- "inference_platform": "unknown",
- "id": "Saxo/Linkbricks-Horizon-AI-Korean-Avengers-V3-27B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.81420408959339
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6403963618749583
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24924471299093656
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35906040268456374
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44667708333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4523769946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Superb-22B/53a6fd3e-37c5-4abc-b387-0ef9f4225760.json b/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Superb-22B/53a6fd3e-37c5-4abc-b387-0ef9f4225760.json
deleted file mode 100644
index 28e55bd6c08efd9a34074fa89a9fad8acdf8090e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Superb-22B/53a6fd3e-37c5-4abc-b387-0ef9f4225760.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Korean-Superb-22B/1762652579.877154",
- "retrieved_timestamp": "1762652579.877155",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B",
- "developer": "Saxo",
- "inference_platform": "unknown",
- "id": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-22B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6766679078179231
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5625539568927603
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23716012084592145
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3263422818791946
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3907708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3871343085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 22.247
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Superb-27B/420f358d-c7a0-4bb5-9d0a-6c44e1f2a354.json b/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Superb-27B/420f358d-c7a0-4bb5-9d0a-6c44e1f2a354.json
deleted file mode 100644
index f9152b4a84ce274aefcc056cf4476ea3300a1844..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Korean-Superb-27B/420f358d-c7a0-4bb5-9d0a-6c44e1f2a354.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Korean-Superb-27B/1762652579.87745",
- "retrieved_timestamp": "1762652579.877451",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B",
- "developer": "Saxo",
- "inference_platform": "unknown",
- "id": "Saxo/Linkbricks-Horizon-AI-Korean-Superb-27B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7767601076255447
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6518345685119445
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2719033232628399
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3598993288590604
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47913541666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4646775265957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Superb-27B/e7007251-609e-4c81-86cf-d6fb79c896c2.json b/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Superb-27B/e7007251-609e-4c81-86cf-d6fb79c896c2.json
deleted file mode 100644
index 299ea70e53d280f70b550689fffc22a58b88c5f4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Saxo/Saxo_Linkbricks-Horizon-AI-Superb-27B/e7007251-609e-4c81-86cf-d6fb79c896c2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Saxo_Linkbricks-Horizon-AI-Superb-27B/1762652579.877677",
- "retrieved_timestamp": "1762652579.8776782",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Saxo/Linkbricks-Horizon-AI-Superb-27B",
- "developer": "Saxo",
- "inference_platform": "unknown",
- "id": "Saxo/Linkbricks-Horizon-AI-Superb-27B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7302235845334822
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6186245528925046
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22205438066465258
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3573825503355705
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.465
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.406000664893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SeaLLMs/SeaLLMs_SeaLLM-7B-v2.5/7117b360-ef16-4da9-9226-b66b6aac9703.json b/leaderboard_data/HFOpenLLMv2/SeaLLMs/SeaLLMs_SeaLLM-7B-v2.5/7117b360-ef16-4da9-9226-b66b6aac9703.json
deleted file mode 100644
index 981a8a2678d7a8f25897d4c2134c2ae7ec56f4c3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SeaLLMs/SeaLLMs_SeaLLM-7B-v2.5/7117b360-ef16-4da9-9226-b66b6aac9703.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SeaLLMs_SeaLLM-7B-v2.5/1762652579.878138",
- "retrieved_timestamp": "1762652579.8781388",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SeaLLMs/SeaLLM-7B-v2.5",
- "developer": "SeaLLMs",
- "inference_platform": "unknown",
- "id": "SeaLLMs/SeaLLM-7B-v2.5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4521536190640833
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49802029594352754
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10876132930513595
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42032291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3203125
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 8.538
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SeaLLMs/SeaLLMs_SeaLLM-7B-v2/8f41a438-e9b7-43c6-b0b2-447a71ac360f.json b/leaderboard_data/HFOpenLLMv2/SeaLLMs/SeaLLMs_SeaLLM-7B-v2/8f41a438-e9b7-43c6-b0b2-447a71ac360f.json
deleted file mode 100644
index e930772fe4d15525fac687f56d3d641eb1f4f2d6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SeaLLMs/SeaLLMs_SeaLLM-7B-v2/8f41a438-e9b7-43c6-b0b2-447a71ac360f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SeaLLMs_SeaLLM-7B-v2/1762652579.877889",
- "retrieved_timestamp": "1762652579.877889",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SeaLLMs/SeaLLM-7B-v2",
- "developer": "SeaLLMs",
- "inference_platform": "unknown",
- "id": "SeaLLMs/SeaLLM-7B-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36712367629002157
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4902100795458318
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08534743202416918
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2785234899328859
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4069583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30826130319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.376
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SeaLLMs/SeaLLMs_SeaLLMs-v3-7B-Chat/f119b2b5-2303-4772-9ae0-ce8f573f86c3.json b/leaderboard_data/HFOpenLLMv2/SeaLLMs/SeaLLMs_SeaLLMs-v3-7B-Chat/f119b2b5-2303-4772-9ae0-ce8f573f86c3.json
deleted file mode 100644
index 88b6f08c87dabf4b81b1161581179d4c2041e081..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SeaLLMs/SeaLLMs_SeaLLMs-v3-7B-Chat/f119b2b5-2303-4772-9ae0-ce8f573f86c3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SeaLLMs_SeaLLMs-v3-7B-Chat/1762652579.8783438",
- "retrieved_timestamp": "1762652579.878345",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SeaLLMs/SeaLLMs-v3-7B-Chat",
- "developer": "SeaLLMs",
- "inference_platform": "unknown",
- "id": "SeaLLMs/SeaLLMs-v3-7B-Chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43766539448662883
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5266406284595359
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18580060422960726
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.417375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3894614361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SenseLLM/SenseLLM_ReflectionCoder-CL-34B/5d7a3d90-8017-4415-a1da-eb70f6145fe4.json b/leaderboard_data/HFOpenLLMv2/SenseLLM/SenseLLM_ReflectionCoder-CL-34B/5d7a3d90-8017-4415-a1da-eb70f6145fe4.json
deleted file mode 100644
index 37070d00bc7dd50de1245176b46f2c4c5f2ba988..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SenseLLM/SenseLLM_ReflectionCoder-CL-34B/5d7a3d90-8017-4415-a1da-eb70f6145fe4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SenseLLM_ReflectionCoder-CL-34B/1762652579.8785448",
- "retrieved_timestamp": "1762652579.878546",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SenseLLM/ReflectionCoder-CL-34B",
- "developer": "SenseLLM",
- "inference_platform": "unknown",
- "id": "SenseLLM/ReflectionCoder-CL-34B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4007710652180658
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39529304297033296
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03323262839879154
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25083892617449666
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41548958333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14237034574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 33.744
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SenseLLM/SenseLLM_ReflectionCoder-DS-33B/2ee4584d-b18c-44dd-af63-22c28b92e107.json b/leaderboard_data/HFOpenLLMv2/SenseLLM/SenseLLM_ReflectionCoder-DS-33B/2ee4584d-b18c-44dd-af63-22c28b92e107.json
deleted file mode 100644
index 407b904df6be80b9ea1805b930b1d180286e3fe1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SenseLLM/SenseLLM_ReflectionCoder-DS-33B/2ee4584d-b18c-44dd-af63-22c28b92e107.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SenseLLM_ReflectionCoder-DS-33B/1762652579.878793",
- "retrieved_timestamp": "1762652579.878794",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SenseLLM/ReflectionCoder-DS-33B",
- "developer": "SenseLLM",
- "inference_platform": "unknown",
- "id": "SenseLLM/ReflectionCoder-DS-33B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3786641666334215
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3449447540164568
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.030211480362537766
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3343125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12017952127659574
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 33.34
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SeppeV/SeppeV_SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo/ff284b60-0c7c-4825-af77-5922831cb3b8.json b/leaderboard_data/HFOpenLLMv2/SeppeV/SeppeV_SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo/ff284b60-0c7c-4825-af77-5922831cb3b8.json
deleted file mode 100644
index 6a6f303319436c009ede4e9e20be39e555c96513..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SeppeV/SeppeV_SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo/ff284b60-0c7c-4825-af77-5922831cb3b8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SeppeV_SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo/1762652579.879464",
- "retrieved_timestamp": "1762652579.8794649",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo",
- "developer": "SeppeV",
- "inference_platform": "unknown",
- "id": "SeppeV/SmolLM_pretrained_with_sft_trained_with_1pc_data_on_a_preference_dpo"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09554648333089535
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3072665948660797
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.012084592145015106
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40320833333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11610704787234043
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sharathhebbar24/Sharathhebbar24_SSH_355M/9ff82d83-2a89-48d8-8ad0-91637a77bc76.json b/leaderboard_data/HFOpenLLMv2/Sharathhebbar24/Sharathhebbar24_SSH_355M/9ff82d83-2a89-48d8-8ad0-91637a77bc76.json
deleted file mode 100644
index 7bd9dda272077e34a48128242da02b94ceb3732b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sharathhebbar24/Sharathhebbar24_SSH_355M/9ff82d83-2a89-48d8-8ad0-91637a77bc76.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sharathhebbar24_SSH_355M/1762652579.8797262",
- "retrieved_timestamp": "1762652579.8797271",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sharathhebbar24/SSH_355M",
- "developer": "Sharathhebbar24",
- "inference_platform": "unknown",
- "id": "Sharathhebbar24/SSH_355M"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1423589409433636
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30985907344593705
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.00906344410876133
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25838926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41775
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11760305851063829
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GPT2LMHeadModel",
- "params_billions": 0.355
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Shreyash2010/Shreyash2010_Uma-4x4B-Instruct-v0.1/83fa529b-8c61-4017-92a8-ec0f46eb7bba.json b/leaderboard_data/HFOpenLLMv2/Shreyash2010/Shreyash2010_Uma-4x4B-Instruct-v0.1/83fa529b-8c61-4017-92a8-ec0f46eb7bba.json
deleted file mode 100644
index f384db25b91f77742e3538fb4370cb0f265627ea..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Shreyash2010/Shreyash2010_Uma-4x4B-Instruct-v0.1/83fa529b-8c61-4017-92a8-ec0f46eb7bba.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Shreyash2010_Uma-4x4B-Instruct-v0.1/1762652579.880244",
- "retrieved_timestamp": "1762652579.880245",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Shreyash2010/Uma-4x4B-Instruct-v0.1",
- "developer": "Shreyash2010",
- "inference_platform": "unknown",
- "id": "Shreyash2010/Uma-4x4B-Instruct-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5516961661724225
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5511602059856503
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17749244712990936
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3347315436241611
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4441041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.386968085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "?",
- "params_billions": 3.821
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sicarius-Prototyping/Sicarius-Prototyping_Micropenis_1B/1ce9038a-7f1f-4b79-9fbc-9e78660094b3.json b/leaderboard_data/HFOpenLLMv2/Sicarius-Prototyping/Sicarius-Prototyping_Micropenis_1B/1ce9038a-7f1f-4b79-9fbc-9e78660094b3.json
deleted file mode 100644
index b830d4d2bfa55eedc0a0a6d0a11f8348e6b87ac6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sicarius-Prototyping/Sicarius-Prototyping_Micropenis_1B/1ce9038a-7f1f-4b79-9fbc-9e78660094b3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sicarius-Prototyping_Micropenis_1B/1762652579.8808",
- "retrieved_timestamp": "1762652579.880801",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sicarius-Prototyping/Micropenis_1B",
- "developer": "Sicarius-Prototyping",
- "inference_platform": "unknown",
- "id": "Sicarius-Prototyping/Micropenis_1B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3460662154195313
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3372377910880025
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04607250755287009
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3325416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18600398936170212
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.618
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sicarius-Prototyping/Sicarius-Prototyping_bacon_and_food/af3374c8-5a23-4a87-990b-123803107ed8.json b/leaderboard_data/HFOpenLLMv2/Sicarius-Prototyping/Sicarius-Prototyping_bacon_and_food/af3374c8-5a23-4a87-990b-123803107ed8.json
deleted file mode 100644
index 82d0c6e359ea479997f5eccd49cf58f47704e1ed..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sicarius-Prototyping/Sicarius-Prototyping_bacon_and_food/af3374c8-5a23-4a87-990b-123803107ed8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sicarius-Prototyping_bacon_and_food/1762652579.881054",
- "retrieved_timestamp": "1762652579.881054",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sicarius-Prototyping/bacon_and_food",
- "developer": "Sicarius-Prototyping",
- "inference_platform": "unknown",
- "id": "Sicarius-Prototyping/bacon_and_food"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5860428108529812
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47245798883729967
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09818731117824774
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3883854166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3262965425531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_2B-ad/31fd60ef-db8f-4785-b486-7a06f1cdf981.json b/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_2B-ad/31fd60ef-db8f-4785-b486-7a06f1cdf981.json
deleted file mode 100644
index 93074dbe1d72b139e6557ff1a84f7a6e9ae9b370..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_2B-ad/31fd60ef-db8f-4785-b486-7a06f1cdf981.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_2B-ad/1762652579.88126",
- "retrieved_timestamp": "1762652579.881261",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SicariusSicariiStuff/2B-ad",
- "developer": "SicariusSicariiStuff",
- "inference_platform": "unknown",
- "id": "SicariusSicariiStuff/2B-ad"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4378903531518593
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40922431523996955
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05060422960725076
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40153124999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2662067819148936
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 3.204
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_2B_or_not_2B/983cf552-1ab1-49ba-aab0-1e644e9a7acb.json b/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_2B_or_not_2B/983cf552-1ab1-49ba-aab0-1e644e9a7acb.json
deleted file mode 100644
index 1f77cfec68266b9b68eb8236c12e3b1e1afdd0a8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_2B_or_not_2B/983cf552-1ab1-49ba-aab0-1e644e9a7acb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_2B_or_not_2B/1762652579.881506",
- "retrieved_timestamp": "1762652579.881506",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SicariusSicariiStuff/2B_or_not_2B",
- "developer": "SicariusSicariiStuff",
- "inference_platform": "unknown",
- "id": "SicariusSicariiStuff/2B_or_not_2B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2062316874781136
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3415917024092019
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.019637462235649546
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24748322147651006
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3790833333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13987699468085107
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 2.506
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Dusk_Rainbow/e8f1d0e1-4086-4645-983b-b9470a22b522.json b/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Dusk_Rainbow/e8f1d0e1-4086-4645-983b-b9470a22b522.json
deleted file mode 100644
index 8b6937a6fcaae8d1108b0387acfb67bd1fb8c83c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Dusk_Rainbow/e8f1d0e1-4086-4645-983b-b9470a22b522.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Dusk_Rainbow/1762652579.881711",
- "retrieved_timestamp": "1762652579.8817122",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SicariusSicariiStuff/Dusk_Rainbow",
- "developer": "SicariusSicariiStuff",
- "inference_platform": "unknown",
- "id": "SicariusSicariiStuff/Dusk_Rainbow"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3588057465303173
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47717504280736184
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07477341389728097
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40252083333333327
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3443317819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Eximius_Persona_5B/98406fba-a2e4-4afd-a121-e33a723d2eb6.json b/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Eximius_Persona_5B/98406fba-a2e4-4afd-a121-e33a723d2eb6.json
deleted file mode 100644
index b931ebf82e359e8de97eb437e4c45cd8bb66ce1b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Eximius_Persona_5B/98406fba-a2e4-4afd-a121-e33a723d2eb6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Eximius_Persona_5B/1762652579.881908",
- "retrieved_timestamp": "1762652579.881909",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SicariusSicariiStuff/Eximius_Persona_5B",
- "developer": "SicariusSicariiStuff",
- "inference_platform": "unknown",
- "id": "SicariusSicariiStuff/Eximius_Persona_5B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6559850086658954
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4511736018571028
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10196374622356495
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38181249999999994
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31399601063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 5.821
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Impish_Mind_8B/3a0633f1-070a-416d-a7ab-f41dd44f577d.json b/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Impish_Mind_8B/3a0633f1-070a-416d-a7ab-f41dd44f577d.json
deleted file mode 100644
index d208466488becaa6694cd18f01725abeb8607cd1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Impish_Mind_8B/3a0633f1-070a-416d-a7ab-f41dd44f577d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Impish_Mind_8B/1762652579.8823712",
- "retrieved_timestamp": "1762652579.8823712",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SicariusSicariiStuff/Impish_Mind_8B",
- "developer": "SicariusSicariiStuff",
- "inference_platform": "unknown",
- "id": "SicariusSicariiStuff/Impish_Mind_8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31791424531354584
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46736571616627115
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10498489425981873
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4069583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3308676861702128
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Redemption_Wind_24B/21216e0b-dc97-4502-ba3d-d47ad1ac73b2.json b/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Redemption_Wind_24B/21216e0b-dc97-4502-ba3d-d47ad1ac73b2.json
deleted file mode 100644
index a06b7f3e47a05c8fe15a443cf4e1a40e4c43e200..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Redemption_Wind_24B/21216e0b-dc97-4502-ba3d-d47ad1ac73b2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Redemption_Wind_24B/1762652579.8843782",
- "retrieved_timestamp": "1762652579.884379",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SicariusSicariiStuff/Redemption_Wind_24B",
- "developer": "SicariusSicariiStuff",
- "inference_platform": "unknown",
- "id": "SicariusSicariiStuff/Redemption_Wind_24B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25014517037017336
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.642816406969129
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18580060422960726
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38338926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4262395833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.543218085106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 23.572
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Winged_Imp_8B/dd1936aa-9b21-466d-b74a-807fafd9f24a.json b/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Winged_Imp_8B/dd1936aa-9b21-466d-b74a-807fafd9f24a.json
deleted file mode 100644
index e6afc71c95f33ea931ddc0004eaa68dd5ee2019c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Winged_Imp_8B/dd1936aa-9b21-466d-b74a-807fafd9f24a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Winged_Imp_8B/1762652579.8845959",
- "retrieved_timestamp": "1762652579.884597",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SicariusSicariiStuff/Winged_Imp_8B",
- "developer": "SicariusSicariiStuff",
- "inference_platform": "unknown",
- "id": "SicariusSicariiStuff/Winged_Imp_8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.743012983328679
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5120376322048542
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12009063444108761
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41483333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3638630319148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Wingless_Imp_8B/2304646d-a399-40c0-8577-0bab9ad2ff3c.json b/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Wingless_Imp_8B/2304646d-a399-40c0-8577-0bab9ad2ff3c.json
deleted file mode 100644
index 5c5fa33b2fd2075cb30c2de3a084e7c7bf7c04aa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Wingless_Imp_8B/2304646d-a399-40c0-8577-0bab9ad2ff3c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Wingless_Imp_8B/1762652579.8848069",
- "retrieved_timestamp": "1762652579.8848078",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SicariusSicariiStuff/Wingless_Imp_8B",
- "developer": "SicariusSicariiStuff",
- "inference_platform": "unknown",
- "id": "SicariusSicariiStuff/Wingless_Imp_8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.743012983328679
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5120376322048542
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12009063444108761
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41483333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3638630319148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Zion_Alpha/9d6d36b1-f8ad-4cc8-b904-c7e3b0a923e4.json b/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Zion_Alpha/9d6d36b1-f8ad-4cc8-b904-c7e3b0a923e4.json
deleted file mode 100644
index c60e4aa61831acf00bf21e6aa7177cffa2a5f43c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_Zion_Alpha/9d6d36b1-f8ad-4cc8-b904-c7e3b0a923e4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Zion_Alpha/1762652579.885025",
- "retrieved_timestamp": "1762652579.885026",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SicariusSicariiStuff/Zion_Alpha",
- "developer": "SicariusSicariiStuff",
- "inference_platform": "unknown",
- "id": "SicariusSicariiStuff/Zion_Alpha"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3324024698910003
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49321099934509743
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05211480362537765
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4726875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31316489361702127
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_dn_ep02/f7f3caa2-0468-4dfb-a817-bb5cdc977911.json b/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_dn_ep02/f7f3caa2-0468-4dfb-a817-bb5cdc977911.json
deleted file mode 100644
index 8d1843bfb319c2fb1b6132263901297d6c7a6211..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SicariusSicariiStuff/SicariusSicariiStuff_dn_ep02/f7f3caa2-0468-4dfb-a817-bb5cdc977911.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_dn_ep02/1762652579.885246",
- "retrieved_timestamp": "1762652579.885247",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SicariusSicariiStuff/dn_ep02",
- "developer": "SicariusSicariiStuff",
- "inference_platform": "unknown",
- "id": "SicariusSicariiStuff/dn_ep02"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5064340394597445
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5266008759836228
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1419939577039275
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31543624161073824
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43163541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39976728723404253
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SkyOrbis/SkyOrbis_SKY-Ko-Qwen2.5-3B-Instruct/bdcf5d38-55d2-4f55-8bd1-7f4cd94f758c.json b/leaderboard_data/HFOpenLLMv2/SkyOrbis/SkyOrbis_SKY-Ko-Qwen2.5-3B-Instruct/bdcf5d38-55d2-4f55-8bd1-7f4cd94f758c.json
deleted file mode 100644
index c224bff8184e5c155aab7652e37c43c9df24f58b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SkyOrbis/SkyOrbis_SKY-Ko-Qwen2.5-3B-Instruct/bdcf5d38-55d2-4f55-8bd1-7f4cd94f758c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Qwen2.5-3B-Instruct/1762652579.887695",
- "retrieved_timestamp": "1762652579.8876958",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct",
- "developer": "SkyOrbis",
- "inference_platform": "unknown",
- "id": "SkyOrbis/SKY-Ko-Qwen2.5-3B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3534100630770799
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4264821228336018
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06948640483383686
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40236458333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28116688829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SkyOrbis/SkyOrbis_SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000/7875e792-80dd-4fa8-9743-b8ef42a4cdb7.json b/leaderboard_data/HFOpenLLMv2/SkyOrbis/SkyOrbis_SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000/7875e792-80dd-4fa8-9743-b8ef42a4cdb7.json
deleted file mode 100644
index 5c393fbac5cbab379c51e61e941d14af198f68c7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SkyOrbis/SkyOrbis_SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000/7875e792-80dd-4fa8-9743-b8ef42a4cdb7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000/1762652579.888021",
- "retrieved_timestamp": "1762652579.888022",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000",
- "developer": "SkyOrbis",
- "inference_platform": "unknown",
- "id": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-15000"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38188672721711725
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5077962006048589
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1865558912386707
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3271812080536913
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44360416666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3913730053191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SkyOrbis/SkyOrbis_SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000/9354b915-68cd-47ca-a1e8-7481a8b33c49.json b/leaderboard_data/HFOpenLLMv2/SkyOrbis/SkyOrbis_SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000/9354b915-68cd-47ca-a1e8-7481a8b33c49.json
deleted file mode 100644
index 1fc7e1e52c5a4f2c922518f92a76e855058ad44a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SkyOrbis/SkyOrbis_SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000/9354b915-68cd-47ca-a1e8-7481a8b33c49.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SkyOrbis_SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000/1762652579.8882601",
- "retrieved_timestamp": "1762652579.888261",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000",
- "developer": "SkyOrbis",
- "inference_platform": "unknown",
- "id": "SkyOrbis/SKY-Ko-Qwen2.5-7B-Instruct-SFT-step-5000"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3812373391490135
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5389864554242366
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20996978851963746
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4237916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42378656914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Solshine/Solshine_Brimful-merged-replete/6523a08c-7a43-4784-9650-e1d5144fcfcf.json b/leaderboard_data/HFOpenLLMv2/Solshine/Solshine_Brimful-merged-replete/6523a08c-7a43-4784-9650-e1d5144fcfcf.json
deleted file mode 100644
index 6a47de4bb8fd01da93a6dd145e5831d38ac01621..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Solshine/Solshine_Brimful-merged-replete/6523a08c-7a43-4784-9650-e1d5144fcfcf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Solshine_Brimful-merged-replete/1762652579.8890932",
- "retrieved_timestamp": "1762652579.8890939",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Solshine/Brimful-merged-replete",
- "developer": "Solshine",
- "inference_platform": "unknown",
- "id": "Solshine/Brimful-merged-replete"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17605619755581856
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28834447696551024
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0030211480362537764
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.342125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10846077127659574
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 12.277
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sourjayon/Sourjayon_DeepSeek-R1-8b-Sify/55a6c2c7-d29e-43a2-abd6-435117967a5d.json b/leaderboard_data/HFOpenLLMv2/Sourjayon/Sourjayon_DeepSeek-R1-8b-Sify/55a6c2c7-d29e-43a2-abd6-435117967a5d.json
deleted file mode 100644
index 22ae550c02714e7d5ff54a609252841ccce25b4a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sourjayon/Sourjayon_DeepSeek-R1-8b-Sify/55a6c2c7-d29e-43a2-abd6-435117967a5d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sourjayon_DeepSeek-R1-8b-Sify/1762652579.89035",
- "retrieved_timestamp": "1762652579.890351",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sourjayon/DeepSeek-R1-8b-Sify",
- "developer": "Sourjayon",
- "inference_platform": "unknown",
- "id": "Sourjayon/DeepSeek-R1-8b-Sify"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3679481553389451
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33793580116642347
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24471299093655588
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2525167785234899
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3303125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19805518617021275
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Sourjayon/Sourjayon_DeepSeek-R1-ForumNXT/101d8dec-2e39-47d1-b76d-d91d6562feff.json b/leaderboard_data/HFOpenLLMv2/Sourjayon/Sourjayon_DeepSeek-R1-ForumNXT/101d8dec-2e39-47d1-b76d-d91d6562feff.json
deleted file mode 100644
index 74cf27e5d7378a245dcf4ea95c8db491e7dd0b4c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Sourjayon/Sourjayon_DeepSeek-R1-ForumNXT/101d8dec-2e39-47d1-b76d-d91d6562feff.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sourjayon_DeepSeek-R1-ForumNXT/1762652579.890614",
- "retrieved_timestamp": "1762652579.890615",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sourjayon/DeepSeek-R1-ForumNXT",
- "developer": "Sourjayon",
- "inference_platform": "unknown",
- "id": "Sourjayon/DeepSeek-R1-ForumNXT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26028714920854445
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3310198487331462
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25755287009063443
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3392395833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16481050531914893
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SpaceYL/SpaceYL_ECE_Poirot/32feb55a-fde5-4bbd-b93e-abffc1a7e573.json b/leaderboard_data/HFOpenLLMv2/SpaceYL/SpaceYL_ECE_Poirot/32feb55a-fde5-4bbd-b93e-abffc1a7e573.json
deleted file mode 100644
index f03de3e07ec1ef8916adda435e2f37fdd0a844bb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SpaceYL/SpaceYL_ECE_Poirot/32feb55a-fde5-4bbd-b93e-abffc1a7e573.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SpaceYL_ECE_Poirot/1762652579.890822",
- "retrieved_timestamp": "1762652579.890822",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SpaceYL/ECE_Poirot",
- "developer": "SpaceYL",
- "inference_platform": "unknown",
- "id": "SpaceYL/ECE_Poirot"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3106956209524063
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42622349736626014
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09138972809667674
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40264583333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2883144946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Spestly/Spestly_Athena-1-3B/29d6834e-38f7-472f-86be-79a8fce03989.json b/leaderboard_data/HFOpenLLMv2/Spestly/Spestly_Athena-1-3B/29d6834e-38f7-472f-86be-79a8fce03989.json
deleted file mode 100644
index 306d6037cfd8c2e371a87fc7a2de07db9530fb5e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Spestly/Spestly_Athena-1-3B/29d6834e-38f7-472f-86be-79a8fce03989.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Spestly_Athena-1-3B/1762652579.8910668",
- "retrieved_timestamp": "1762652579.891068",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Spestly/Athena-1-3B",
- "developer": "Spestly",
- "inference_platform": "unknown",
- "id": "Spestly/Athena-1-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5569167586448401
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47015477265388084
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23791540785498488
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43622916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35189494680851063
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Spestly/Spestly_Atlas-Pro-1.5B-Preview/8282705f-6b69-40c2-825d-8e0c72756083.json b/leaderboard_data/HFOpenLLMv2/Spestly/Spestly_Atlas-Pro-1.5B-Preview/8282705f-6b69-40c2-825d-8e0c72756083.json
deleted file mode 100644
index b65bf5e42cc2cafaee931bb5bdd676f633f968c8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Spestly/Spestly_Atlas-Pro-1.5B-Preview/8282705f-6b69-40c2-825d-8e0c72756083.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Spestly_Atlas-Pro-1.5B-Preview/1762652579.891309",
- "retrieved_timestamp": "1762652579.89131",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Spestly/Atlas-Pro-1.5B-Preview",
- "developer": "Spestly",
- "inference_platform": "unknown",
- "id": "Spestly/Atlas-Pro-1.5B-Preview"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2429509257658568
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.349893585329524
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31948640483383683
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3354270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1924867021276596
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Spestly/Spestly_Atlas-Pro-7B-Preview/57a36976-0868-462e-ab57-3addef7ea2f9.json b/leaderboard_data/HFOpenLLMv2/Spestly/Spestly_Atlas-Pro-7B-Preview/57a36976-0868-462e-ab57-3addef7ea2f9.json
deleted file mode 100644
index 55a5565d004e7abe17fed6dc678d99e6370ab6d0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Spestly/Spestly_Atlas-Pro-7B-Preview/57a36976-0868-462e-ab57-3addef7ea2f9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Spestly_Atlas-Pro-7B-Preview/1762652579.891519",
- "retrieved_timestamp": "1762652579.89152",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Spestly/Atlas-Pro-7B-Preview",
- "developer": "Spestly",
- "inference_platform": "unknown",
- "id": "Spestly/Atlas-Pro-7B-Preview"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31541642840995227
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46679203304308553
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5083081570996979
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.337248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3910833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2970412234042553
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Stark2008/Stark2008_GutenLaserPi/e418f7d1-8fd6-44ea-bc33-62fb525589f1.json b/leaderboard_data/HFOpenLLMv2/Stark2008/Stark2008_GutenLaserPi/e418f7d1-8fd6-44ea-bc33-62fb525589f1.json
deleted file mode 100644
index ef3a16d51efacb419c243818576e596b68c5447e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Stark2008/Stark2008_GutenLaserPi/e418f7d1-8fd6-44ea-bc33-62fb525589f1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Stark2008_GutenLaserPi/1762652579.891723",
- "retrieved_timestamp": "1762652579.891723",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Stark2008/GutenLaserPi",
- "developer": "Stark2008",
- "inference_platform": "unknown",
- "id": "Stark2008/GutenLaserPi"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42265300513747966
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5212342482489518
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07854984894259819
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28691275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4620208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31058843085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Stark2008/Stark2008_LayleleFlamPi/c12a519e-9d34-4671-8e98-c69178e08ec0.json b/leaderboard_data/HFOpenLLMv2/Stark2008/Stark2008_LayleleFlamPi/c12a519e-9d34-4671-8e98-c69178e08ec0.json
deleted file mode 100644
index eea5e7327080b40d29df05de15fc66aa3c7eef20..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Stark2008/Stark2008_LayleleFlamPi/c12a519e-9d34-4671-8e98-c69178e08ec0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Stark2008_LayleleFlamPi/1762652579.8919628",
- "retrieved_timestamp": "1762652579.891964",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Stark2008/LayleleFlamPi",
- "developer": "Stark2008",
- "inference_platform": "unknown",
- "id": "Stark2008/LayleleFlamPi"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42842325030917966
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5115654142581095
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06646525679758308
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46084375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3093417553191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Stark2008/Stark2008_VisFlamCat/ed5f857e-6799-4729-a2e5-afbea4b89ecd.json b/leaderboard_data/HFOpenLLMv2/Stark2008/Stark2008_VisFlamCat/ed5f857e-6799-4729-a2e5-afbea4b89ecd.json
deleted file mode 100644
index 110ddf110e1c8eddf795ca7decfce9f4d1a3916a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Stark2008/Stark2008_VisFlamCat/ed5f857e-6799-4729-a2e5-afbea4b89ecd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Stark2008_VisFlamCat/1762652579.892166",
- "retrieved_timestamp": "1762652579.892166",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Stark2008/VisFlamCat",
- "developer": "Stark2008",
- "inference_platform": "unknown",
- "id": "Stark2008/VisFlamCat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43659157701565177
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5216957865099948
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07628398791540786
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44627083333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31441156914893614
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Steelskull/Steelskull_L3.3-MS-Nevoria-70b/5db5f87b-9bb0-4d29-b578-72bb896f3359.json b/leaderboard_data/HFOpenLLMv2/Steelskull/Steelskull_L3.3-MS-Nevoria-70b/5db5f87b-9bb0-4d29-b578-72bb896f3359.json
deleted file mode 100644
index 9ed5f258190e6c4b43b134b70f0fc752df991556..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Steelskull/Steelskull_L3.3-MS-Nevoria-70b/5db5f87b-9bb0-4d29-b578-72bb896f3359.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Steelskull_L3.3-MS-Nevoria-70b/1762652579.8924139",
- "retrieved_timestamp": "1762652579.892415",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Steelskull/L3.3-MS-Nevoria-70b",
- "developer": "Steelskull",
- "inference_platform": "unknown",
- "id": "Steelskull/L3.3-MS-Nevoria-70b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6963268571833845
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6997536580025828
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3957703927492447
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47063758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4682291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5535239361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Steelskull/Steelskull_L3.3-Nevoria-R1-70b/1465ebc9-f2c3-46df-b5e1-37e7a027fde8.json b/leaderboard_data/HFOpenLLMv2/Steelskull/Steelskull_L3.3-Nevoria-R1-70b/1465ebc9-f2c3-46df-b5e1-37e7a027fde8.json
deleted file mode 100644
index ea033a79a766e63624658b69fb3118c469253925..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Steelskull/Steelskull_L3.3-Nevoria-R1-70b/1465ebc9-f2c3-46df-b5e1-37e7a027fde8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Steelskull_L3.3-Nevoria-R1-70b/1762652579.892649",
- "retrieved_timestamp": "1762652579.89265",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Steelskull/L3.3-Nevoria-R1-70b",
- "developer": "Steelskull",
- "inference_platform": "unknown",
- "id": "Steelskull/L3.3-Nevoria-R1-70b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6023794642659255
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6971668662651651
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46299093655589124
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46895973154362414
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47753125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5462932180851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/StelleX/StelleX_Vorisatex-7B-preview/875156be-2ff9-4ec4-8085-27f22fb19259.json b/leaderboard_data/HFOpenLLMv2/StelleX/StelleX_Vorisatex-7B-preview/875156be-2ff9-4ec4-8085-27f22fb19259.json
deleted file mode 100644
index a66bd76cdd1cbfe8a4b74de6fff078ee2ef16fe0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/StelleX/StelleX_Vorisatex-7B-preview/875156be-2ff9-4ec4-8085-27f22fb19259.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/StelleX_Vorisatex-7B-preview/1762652579.893095",
- "retrieved_timestamp": "1762652579.893096",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "StelleX/Vorisatex-7B-preview",
- "developer": "StelleX",
- "inference_platform": "unknown",
- "id": "StelleX/Vorisatex-7B-preview"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1515013497519914
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3111695757290421
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.028700906344410877
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2516778523489933
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41923958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11660571808510638
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SultanR/SultanR_SmolTulu-1.7b-Instruct/1b0bd686-fd26-441f-b280-97b10bb1449c.json b/leaderboard_data/HFOpenLLMv2/SultanR/SultanR_SmolTulu-1.7b-Instruct/1b0bd686-fd26-441f-b280-97b10bb1449c.json
deleted file mode 100644
index 83c7ff8de80cd87232494b0b34f5cc3510d2d4d2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SultanR/SultanR_SmolTulu-1.7b-Instruct/1b0bd686-fd26-441f-b280-97b10bb1449c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SultanR_SmolTulu-1.7b-Instruct/1762652579.893334",
- "retrieved_timestamp": "1762652579.893334",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SultanR/SmolTulu-1.7b-Instruct",
- "developer": "SultanR",
- "inference_platform": "unknown",
- "id": "SultanR/SmolTulu-1.7b-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6540867121459949
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3713086260572204
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07930513595166164
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26929530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35403125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17104388297872342
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.711
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SultanR/SultanR_SmolTulu-1.7b-Reinforced/224b4cbc-e36c-4f68-9918-edbdaf947191.json b/leaderboard_data/HFOpenLLMv2/SultanR/SultanR_SmolTulu-1.7b-Reinforced/224b4cbc-e36c-4f68-9918-edbdaf947191.json
deleted file mode 100644
index b559221a06efe4f319389da0b1e64d995186a2d9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SultanR/SultanR_SmolTulu-1.7b-Reinforced/224b4cbc-e36c-4f68-9918-edbdaf947191.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SultanR_SmolTulu-1.7b-Reinforced/1762652579.893585",
- "retrieved_timestamp": "1762652579.893586",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SultanR/SmolTulu-1.7b-Reinforced",
- "developer": "SultanR",
- "inference_platform": "unknown",
- "id": "SultanR/SmolTulu-1.7b-Reinforced"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6790659893526954
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3551868188444029
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07175226586102719
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34060416666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17627992021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.711
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/SultanR/SultanR_SmolTulu-1.7b-it-v0/22ea218a-e3be-4e05-9a94-af716bb3a624.json b/leaderboard_data/HFOpenLLMv2/SultanR/SultanR_SmolTulu-1.7b-it-v0/22ea218a-e3be-4e05-9a94-af716bb3a624.json
deleted file mode 100644
index f60ab4a0e241f6cfe9976e5706d03ab491078cfe..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/SultanR/SultanR_SmolTulu-1.7b-it-v0/22ea218a-e3be-4e05-9a94-af716bb3a624.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SultanR_SmolTulu-1.7b-it-v0/1762652579.8938031",
- "retrieved_timestamp": "1762652579.8938031",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SultanR/SmolTulu-1.7b-it-v0",
- "developer": "SultanR",
- "inference_platform": "unknown",
- "id": "SultanR/SmolTulu-1.7b-it-v0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6540867121459949
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3713086260572204
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07930513595166164
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26929530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35403125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17104388297872342
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.711
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBA-123/a469604f-f755-46e0-8b1c-db4a365dec34.json b/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBA-123/a469604f-f755-46e0-8b1c-db4a365dec34.json
deleted file mode 100644
index e276ae9a6400cb11a4b49d6561eb4032e10ffa34..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBA-123/a469604f-f755-46e0-8b1c-db4a365dec34.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Supichi_BBA-123/1762652579.894015",
- "retrieved_timestamp": "1762652579.894016",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Supichi/BBA-123",
- "developer": "Supichi",
- "inference_platform": "unknown",
- "id": "Supichi/BBA-123"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2079548930171944
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2920111436321769
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34990625000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11668882978723404
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 17.161
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBA99/fa793cb5-5522-4777-8d6f-e4719a51f767.json b/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBA99/fa793cb5-5522-4777-8d6f-e4719a51f767.json
deleted file mode 100644
index 5d13813d0abeb69a0acdfda4dc7fc4acbf9b4026..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBA99/fa793cb5-5522-4777-8d6f-e4719a51f767.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Supichi_BBA99/1762652579.8942661",
- "retrieved_timestamp": "1762652579.8942661",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Supichi/BBA99",
- "developer": "Supichi",
- "inference_platform": "unknown",
- "id": "Supichi/BBA99"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14066011516110588
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2768958340020912
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32184375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11120345744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 17.161
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAIK29/de5f2ab9-f1d2-49bc-9771-41b9da1bdfa3.json b/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAIK29/de5f2ab9-f1d2-49bc-9771-41b9da1bdfa3.json
deleted file mode 100644
index 59abdbd1db426a1b3f07ea6420672c42c3b23d20..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAIK29/de5f2ab9-f1d2-49bc-9771-41b9da1bdfa3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Supichi_BBAIK29/1762652579.89447",
- "retrieved_timestamp": "1762652579.894471",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Supichi/BBAIK29",
- "developer": "Supichi",
- "inference_platform": "unknown",
- "id": "Supichi/BBAIK29"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45884807865352817
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5589641249478369
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3678247734138973
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31208053691275167
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45008333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4468916223404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_250_Xia0_gZ/068a06f4-3fdc-495f-b7e4-0effebe24e42.json b/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_250_Xia0_gZ/068a06f4-3fdc-495f-b7e4-0effebe24e42.json
deleted file mode 100644
index 2fa4d38cd01ac80dbf957ad41e561e65701ebe0b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_250_Xia0_gZ/068a06f4-3fdc-495f-b7e4-0effebe24e42.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Supichi_BBAI_250_Xia0_gZ/1762652579.894933",
- "retrieved_timestamp": "1762652579.894933",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Supichi/BBAI_250_Xia0_gZ",
- "developer": "Supichi",
- "inference_platform": "unknown",
- "id": "Supichi/BBAI_250_Xia0_gZ"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4685401401614383
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5567682997527722
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3640483383685801
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3213087248322148
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4579270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4464760638297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_275_Tsunami_gZ/173028b9-03e3-44d7-a7e9-2c0c5c6f4b4e.json b/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_275_Tsunami_gZ/173028b9-03e3-44d7-a7e9-2c0c5c6f4b4e.json
deleted file mode 100644
index aad977057fc5952a3cf473dcbc42fb4ef324caf2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_275_Tsunami_gZ/173028b9-03e3-44d7-a7e9-2c0c5c6f4b4e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Supichi_BBAI_275_Tsunami_gZ/1762652579.895135",
- "retrieved_timestamp": "1762652579.895135",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Supichi/BBAI_275_Tsunami_gZ",
- "developer": "Supichi",
- "inference_platform": "unknown",
- "id": "Supichi/BBAI_275_Tsunami_gZ"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5369586031729146
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5531259476127334
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3285498489425982
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3213087248322148
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44478124999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44921875
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_525_Tsu_gZ_Xia0/6b6b273e-9cf0-405e-b1e4-5fdbd2ae16d9.json b/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_525_Tsu_gZ_Xia0/6b6b273e-9cf0-405e-b1e4-5fdbd2ae16d9.json
deleted file mode 100644
index 3baa32beb06e881fd252be60f888f7b36e7818aa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_525_Tsu_gZ_Xia0/6b6b273e-9cf0-405e-b1e4-5fdbd2ae16d9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Supichi_BBAI_525_Tsu_gZ_Xia0/1762652579.8953412",
- "retrieved_timestamp": "1762652579.8953412",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Supichi/BBAI_525_Tsu_gZ_Xia0",
- "developer": "Supichi",
- "inference_platform": "unknown",
- "id": "Supichi/BBAI_525_Tsu_gZ_Xia0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5338612658856279
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5561933633430705
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3429003021148036
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31208053691275167
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44744791666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44772273936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_78B_Calme_3_1_Ties/a9c4a482-6b02-4cf6-a7d5-3e16334df634.json b/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_78B_Calme_3_1_Ties/a9c4a482-6b02-4cf6-a7d5-3e16334df634.json
deleted file mode 100644
index 68c62d73ac2c6d5f8f6f014a0823b705cf6003af..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_78B_Calme_3_1_Ties/a9c4a482-6b02-4cf6-a7d5-3e16334df634.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Supichi_BBAI_78B_Calme_3_1_Ties/1762652579.895541",
- "retrieved_timestamp": "1762652579.895541",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Supichi/BBAI_78B_Calme_3_1_Ties",
- "developer": "Supichi",
- "inference_platform": "unknown",
- "id": "Supichi/BBAI_78B_Calme_3_1_Ties"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18280052482967415
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28281264175951776
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22902684563758388
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30996874999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11436170212765957
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 27.06
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_QWEEN_V000000_LUMEN_14B/57fd3fdc-dfdd-44ee-8c30-dc5ce4a0df8d.json b/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_QWEEN_V000000_LUMEN_14B/57fd3fdc-dfdd-44ee-8c30-dc5ce4a0df8d.json
deleted file mode 100644
index 7fd74fb8160b747e706f832479f0a117bb583d0a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_BBAI_QWEEN_V000000_LUMEN_14B/57fd3fdc-dfdd-44ee-8c30-dc5ce4a0df8d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Supichi_BBAI_QWEEN_V000000_LUMEN_14B/1762652579.895749",
- "retrieved_timestamp": "1762652579.8957498",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Supichi/BBAI_QWEEN_V000000_LUMEN_14B",
- "developer": "Supichi",
- "inference_platform": "unknown",
- "id": "Supichi/BBAI_QWEEN_V000000_LUMEN_14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18145188100905596
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22972580681005383
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23154362416107382
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3445416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11602393617021277
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 10.366
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_HF_TOKEN/cd0ccaff-e1b3-4c11-a8a0-37137d0386e2.json b/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_HF_TOKEN/cd0ccaff-e1b3-4c11-a8a0-37137d0386e2.json
deleted file mode 100644
index 40c0d35dff5b1a20ae66812b58ef19308447d255..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_HF_TOKEN/cd0ccaff-e1b3-4c11-a8a0-37137d0386e2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Supichi_HF_TOKEN/1762652579.895958",
- "retrieved_timestamp": "1762652579.895958",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Supichi/HF_TOKEN",
- "developer": "Supichi",
- "inference_platform": "unknown",
- "id": "Supichi/HF_TOKEN"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1379872072766925
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2763924734767205
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0007552870090634441
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32717708333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11095412234042554
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 17.161
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_NJS26/f336c7ee-2275-4045-a227-1a7abbaebf63.json b/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_NJS26/f336c7ee-2275-4045-a227-1a7abbaebf63.json
deleted file mode 100644
index 9f4e574f6fdc2e13f920f7c2dd76ce41dddf8a00..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Supichi/Supichi_NJS26/f336c7ee-2275-4045-a227-1a7abbaebf63.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Supichi_NJS26/1762652579.8961651",
- "retrieved_timestamp": "1762652579.8961651",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Supichi/NJS26",
- "developer": "Supichi",
- "inference_platform": "unknown",
- "id": "Supichi/NJS26"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04481331755298164
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4780152929488641
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0324773413897281
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3179530201342282
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38540625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036901595744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Svak/Svak_MN-12B-Inferor-v0.0/5bb52ed5-e59a-4e60-a6eb-9e9322d95ccc.json b/leaderboard_data/HFOpenLLMv2/Svak/Svak_MN-12B-Inferor-v0.0/5bb52ed5-e59a-4e60-a6eb-9e9322d95ccc.json
deleted file mode 100644
index f8e837b584b6ded45f933108873b36f73e82af88..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Svak/Svak_MN-12B-Inferor-v0.0/5bb52ed5-e59a-4e60-a6eb-9e9322d95ccc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Svak_MN-12B-Inferor-v0.0/1762652579.896373",
- "retrieved_timestamp": "1762652579.896374",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Svak/MN-12B-Inferor-v0.0",
- "developer": "Svak",
- "inference_platform": "unknown",
- "id": "Svak/MN-12B-Inferor-v0.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5707555951541909
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5195010930589931
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10196374622356495
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46388541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3558843085106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Svak/Svak_MN-12B-Inferor-v0.1/9bfe838e-a568-4933-b03d-3e9ae6d2026d.json b/leaderboard_data/HFOpenLLMv2/Svak/Svak_MN-12B-Inferor-v0.1/9bfe838e-a568-4933-b03d-3e9ae6d2026d.json
deleted file mode 100644
index 17d95df798a8f6d7901266b262271c61689040f8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Svak/Svak_MN-12B-Inferor-v0.1/9bfe838e-a568-4933-b03d-3e9ae6d2026d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Svak_MN-12B-Inferor-v0.1/1762652579.8966348",
- "retrieved_timestamp": "1762652579.896636",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Svak/MN-12B-Inferor-v0.1",
- "developer": "Svak",
- "inference_platform": "unknown",
- "id": "Svak/MN-12B-Inferor-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6346527214457639
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5146762089838804
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12613293051359517
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32550335570469796
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4350833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3661901595744681
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Syed-Hasan-8503/Syed-Hasan-8503_Phi-3-mini-4K-instruct-cpo-simpo/58bacacb-2936-4685-b0ba-dc8f47f3232a.json b/leaderboard_data/HFOpenLLMv2/Syed-Hasan-8503/Syed-Hasan-8503_Phi-3-mini-4K-instruct-cpo-simpo/58bacacb-2936-4685-b0ba-dc8f47f3232a.json
deleted file mode 100644
index 689bfdbd01d4b7882022fd0cd6680c3b0e905e72..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Syed-Hasan-8503/Syed-Hasan-8503_Phi-3-mini-4K-instruct-cpo-simpo/58bacacb-2936-4685-b0ba-dc8f47f3232a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Syed-Hasan-8503_Phi-3-mini-4K-instruct-cpo-simpo/1762652579.896852",
- "retrieved_timestamp": "1762652579.896853",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo",
- "developer": "Syed-Hasan-8503",
- "inference_platform": "unknown",
- "id": "Syed-Hasan-8503/Phi-3-mini-4K-instruct-cpo-simpo"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5714049832222946
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5681534123661078
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15709969788519637
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3963541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38605385638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Phi3ForCausalLM",
- "params_billions": 3.821
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V1-P1/5bedfdac-2976-4a21-9ae2-a5b5b06e1e14.json b/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V1-P1/5bedfdac-2976-4a21-9ae2-a5b5b06e1e14.json
deleted file mode 100644
index 2e6246333c67893c7c3b8fab9492965b99e7331a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V1-P1/5bedfdac-2976-4a21-9ae2-a5b5b06e1e14.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V1-P1/1762652579.897121",
- "retrieved_timestamp": "1762652579.8971221",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/KRONOS-8B-V1-P1",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/KRONOS-8B-V1-P1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7849783020164276
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.508544756293663
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19788519637462235
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3881041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3759973404255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V1-P2/a5d0fc39-cac5-409f-8375-636ef97fba8c.json b/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V1-P2/a5d0fc39-cac5-409f-8375-636ef97fba8c.json
deleted file mode 100644
index 87d83ff5a170f899b5de5d0d388acfe688c8b5f2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V1-P2/a5d0fc39-cac5-409f-8375-636ef97fba8c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V1-P2/1762652579.897378",
- "retrieved_timestamp": "1762652579.8973792",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/KRONOS-8B-V1-P2",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/KRONOS-8B-V1-P2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6724213974476612
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47717566218002166
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16012084592145015
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3567604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3453291223404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V1-P3/14eb1867-80a0-47f9-9b2a-f0a05f683fb4.json b/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V1-P3/14eb1867-80a0-47f9-9b2a-f0a05f683fb4.json
deleted file mode 100644
index 4d0af8ffd6f593da390111228951bb47a785ef13..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V1-P3/14eb1867-80a0-47f9-9b2a-f0a05f683fb4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V1-P3/1762652579.897578",
- "retrieved_timestamp": "1762652579.897579",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/KRONOS-8B-V1-P3",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/KRONOS-8B-V1-P3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7137373280673058
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5127875870036823
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19259818731117825
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3615625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34050864361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V2/ff4c64ec-f44b-4bec-9534-bafa632a0e3f.json b/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V2/ff4c64ec-f44b-4bec-9534-bafa632a0e3f.json
deleted file mode 100644
index 446292f96d5bed2d2e1f8262baf49a4429114306..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V2/ff4c64ec-f44b-4bec-9534-bafa632a0e3f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V2/1762652579.897814",
- "retrieved_timestamp": "1762652579.897815",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/KRONOS-8B-V2",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/KRONOS-8B-V2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5180243974875552
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.513268555595521
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22658610271903323
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38286458333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3737533244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V3/fc5613f1-09bc-4b82-89f4-4ee671cad5bf.json b/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V3/fc5613f1-09bc-4b82-89f4-4ee671cad5bf.json
deleted file mode 100644
index 4aa1a5c6cd1c60d1bcea75d465b44603affd4af0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V3/fc5613f1-09bc-4b82-89f4-4ee671cad5bf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V3/1762652579.8980181",
- "retrieved_timestamp": "1762652579.898019",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/KRONOS-8B-V3",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/KRONOS-8B-V3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5474751437297483
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.511865544689898
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2598187311178248
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28859060402684567
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3922291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3738364361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V4/af8665b4-d9be-4243-9c8d-0b43e7abd540.json b/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V4/af8665b4-d9be-4243-9c8d-0b43e7abd540.json
deleted file mode 100644
index 58b32f6ddeb75646f9d1f6049280ad469a24af4e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V4/af8665b4-d9be-4243-9c8d-0b43e7abd540.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V4/1762652579.898447",
- "retrieved_timestamp": "1762652579.898448",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/KRONOS-8B-V4",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/KRONOS-8B-V4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7889499860370484
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5092470034846742
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19486404833836857
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38295833333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37857380319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V5/290206b5-0e46-4f92-a2bd-f2c53ef3d147.json b/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V5/290206b5-0e46-4f92-a2bd-f2c53ef3d147.json
deleted file mode 100644
index 81073668ef261d162c9c2911403647c2ad2922b6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V5/290206b5-0e46-4f92-a2bd-f2c53ef3d147.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V5/1762652579.8986921",
- "retrieved_timestamp": "1762652579.898693",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/KRONOS-8B-V5",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/KRONOS-8B-V5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5405058577906621
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5088651598969166
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2688821752265861
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40546875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37591422872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V6/78813c35-3eaa-4ae6-9099-bf79efb8b0df.json b/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V6/78813c35-3eaa-4ae6-9099-bf79efb8b0df.json
deleted file mode 100644
index 78f19fc8088503f793e1c1606b16f9d465414d61..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V6/78813c35-3eaa-4ae6-9099-bf79efb8b0df.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V6/1762652579.898935",
- "retrieved_timestamp": "1762652579.898936",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/KRONOS-8B-V6",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/KRONOS-8B-V6"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7022467054083166
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5033606149499412
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2598187311178248
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41210416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3501496010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V7/1358fee5-3874-4997-b1f0-6e93c6c5e9c0.json b/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V7/1358fee5-3874-4997-b1f0-6e93c6c5e9c0.json
deleted file mode 100644
index aa2d8d0aa6414a20bffa3e2403b7c4f0c96b964a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V7/1358fee5-3874-4997-b1f0-6e93c6c5e9c0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V7/1762652579.899169",
- "retrieved_timestamp": "1762652579.8991702",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/KRONOS-8B-V7",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/KRONOS-8B-V7"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3529102780622083
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4526219443939161
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11102719033232629
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26677852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36711458333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2696974734042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 4.015
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V8/57a4ddc6-0447-4840-94bc-5bb136025aab.json b/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V8/57a4ddc6-0447-4840-94bc-5bb136025aab.json
deleted file mode 100644
index 76cf489b05515a37bb3a91642bbc821f327070be..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V8/57a4ddc6-0447-4840-94bc-5bb136025aab.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V8/1762652579.899387",
- "retrieved_timestamp": "1762652579.8993878",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/KRONOS-8B-V8",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/KRONOS-8B-V8"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7770349339751859
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5094406613555632
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20468277945619334
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3868958333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37824135638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V9/6fbb6156-196d-4523-900e-35316100d3b9.json b/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V9/6fbb6156-196d-4523-900e-35316100d3b9.json
deleted file mode 100644
index 7dd5a10bfae9fb24dd1938e4d4fc2e99b6d17cd1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_KRONOS-8B-V9/6fbb6156-196d-4523-900e-35316100d3b9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_KRONOS-8B-V9/1762652579.8996658",
- "retrieved_timestamp": "1762652579.899667",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/KRONOS-8B-V9",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/KRONOS-8B-V9"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7855778224001206
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5099211908307056
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1986404833836858
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3868020833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3751662234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_Llama-3.1-8B-Instruct-Zeus/38e620aa-c577-4b14-bebd-e98ebcbe48b2.json b/leaderboard_data/HFOpenLLMv2/T145/T145_Llama-3.1-8B-Instruct-Zeus/38e620aa-c577-4b14-bebd-e98ebcbe48b2.json
deleted file mode 100644
index 9f324330ad844ba0d9cdfc63c074ad72f71c877f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_Llama-3.1-8B-Instruct-Zeus/38e620aa-c577-4b14-bebd-e98ebcbe48b2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_Llama-3.1-8B-Instruct-Zeus/1762652579.899903",
- "retrieved_timestamp": "1762652579.899904",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/Llama-3.1-8B-Instruct-Zeus",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/Llama-3.1-8B-Instruct-Zeus"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7941207108250552
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5173982439996302
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19561933534743203
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011744966442953
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39762499999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38929521276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_Meta-Llama-3.1-8B-Instruct-TIES/15b92d44-3d68-4c6a-bddd-5676ebda2e10.json b/leaderboard_data/HFOpenLLMv2/T145/T145_Meta-Llama-3.1-8B-Instruct-TIES/15b92d44-3d68-4c6a-bddd-5676ebda2e10.json
deleted file mode 100644
index ac8d9e822f6a88ef76492f5f776b566189d47fd4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_Meta-Llama-3.1-8B-Instruct-TIES/15b92d44-3d68-4c6a-bddd-5676ebda2e10.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_Meta-Llama-3.1-8B-Instruct-TIES/1762652579.900369",
- "retrieved_timestamp": "1762652579.900369",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/Meta-Llama-3.1-8B-Instruct-TIES",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/Meta-Llama-3.1-8B-Instruct-TIES"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5423542866261519
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5070111385564763
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20996978851963746
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3842916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37799202127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V10/464bae3d-bd06-4264-a939-59ab8e562ca6.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V10/464bae3d-bd06-4264-a939-59ab8e562ca6.json
deleted file mode 100644
index ce6cae1100c1a1dffcc4f50620b46225a0fb30f7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V10/464bae3d-bd06-4264-a939-59ab8e562ca6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V10/1762652579.900583",
- "retrieved_timestamp": "1762652579.900584",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V10",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V10"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7706651684197928
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5269758270442659
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21148036253776434
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32466442953020136
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38978124999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.390375664893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V11/a6eedf29-9ec8-4b03-a8f5-c9c4e2bda688.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V11/a6eedf29-9ec8-4b03-a8f5-c9c4e2bda688.json
deleted file mode 100644
index d416e9837eb5ceda8fd4be3505fe498f70e40f24..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V11/a6eedf29-9ec8-4b03-a8f5-c9c4e2bda688.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V11/1762652579.900793",
- "retrieved_timestamp": "1762652579.900793",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V11",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V11"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8099575792231279
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5161982586505715
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19637462235649547
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3145973154362416
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38066666666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38838098404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V12/1ab70352-9bda-47c8-8bdf-90860934cfc7.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V12/1ab70352-9bda-47c8-8bdf-90860934cfc7.json
deleted file mode 100644
index e4c5395006a6c8f57042e9728e8d22c0cf16c176..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V12/1ab70352-9bda-47c8-8bdf-90860934cfc7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V12/1762652579.901004",
- "retrieved_timestamp": "1762652579.901004",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V12",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V12"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.781556270695089
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5253912026310238
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21148036253776434
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38584375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3912067819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V13-abliterated/7c39d06a-dafe-40a7-b5a1-dca14dcadff2.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V13-abliterated/7c39d06a-dafe-40a7-b5a1-dca14dcadff2.json
deleted file mode 100644
index 33c8066c7452450d8e697b8326ba1e94b32f4bc8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V13-abliterated/7c39d06a-dafe-40a7-b5a1-dca14dcadff2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V13-abliterated/1762652579.901429",
- "retrieved_timestamp": "1762652579.9014301",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V13-abliterated",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V13-abliterated"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7877509452696623
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5197597316957202
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17900302114803626
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.311241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3871458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38721742021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V13/10823e50-9478-4a8a-83cf-5169a0bc1f1f.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V13/10823e50-9478-4a8a-83cf-5169a0bc1f1f.json
deleted file mode 100644
index 524a1eccf3c326435b2d2546f2deac4e82da83a5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V13/10823e50-9478-4a8a-83cf-5169a0bc1f1f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V13/1762652579.90121",
- "retrieved_timestamp": "1762652579.9012108",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V13",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V13"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7904238531540756
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5277128851736589
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21374622356495468
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3238255033557047
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38447916666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39112367021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V14/2b0eb3f5-d35e-41ea-ba69-18c0b8a3e1e1.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V14/2b0eb3f5-d35e-41ea-ba69-18c0b8a3e1e1.json
deleted file mode 100644
index 030e9224dc44ea1921c719f0839b801191147b3a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V14/2b0eb3f5-d35e-41ea-ba69-18c0b8a3e1e1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V14/1762652579.901653",
- "retrieved_timestamp": "1762652579.901653",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V14",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V14"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.770939994769434
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5274593322517976
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21299093655589124
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3844479166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3913730053191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V15/3e1be4f3-478f-4061-9856-f1beb0a749de.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V15/3e1be4f3-478f-4061-9856-f1beb0a749de.json
deleted file mode 100644
index 08d4edf5149e8bb30013ed6dd8edb26faae84bec..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V15/3e1be4f3-478f-4061-9856-f1beb0a749de.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V15/1762652579.901858",
- "retrieved_timestamp": "1762652579.901859",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V15",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V15"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.701272623306161
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5537552380544757
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23036253776435045
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40199999999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40591755319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 4.015
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V16/7beef3ca-6423-4a81-836d-0e4cdc4af973.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V16/7beef3ca-6423-4a81-836d-0e4cdc4af973.json
deleted file mode 100644
index e7b1e0840fc5b0d16705805f072b9bc3a290c8d0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V16/7beef3ca-6423-4a81-836d-0e4cdc4af973.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V16/1762652579.9020631",
- "retrieved_timestamp": "1762652579.902064",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V16",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V16"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7925471083392066
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5265817990313368
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22054380664652568
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3950833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39261968085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17-abliterated-V2/3344d19c-c79b-48b3-be5b-f5f27d6920ce.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17-abliterated-V2/3344d19c-c79b-48b3-be5b-f5f27d6920ce.json
deleted file mode 100644
index e9165158f6326d4bf8a3c8f7a0402ca4bbdbfd71..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17-abliterated-V2/3344d19c-c79b-48b3-be5b-f5f27d6920ce.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V17-abliterated-V2/1762652579.902674",
- "retrieved_timestamp": "1762652579.902674",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V17-abliterated-V2",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V17-abliterated-V2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6532123654126606
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49280119619174295
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11178247734138973
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27348993288590606
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3407291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34017619680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17-abliterated-V4/bf9c0bfa-98e5-45b2-8819-0911af81d78f.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17-abliterated-V4/bf9c0bfa-98e5-45b2-8819-0911af81d78f.json
deleted file mode 100644
index 1d2a32fdd930a438f278bf062674908c101ed23b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17-abliterated-V4/bf9c0bfa-98e5-45b2-8819-0911af81d78f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V17-abliterated-V4/1762652579.902891",
- "retrieved_timestamp": "1762652579.902891",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V17-abliterated-V4",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V17-abliterated-V4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7228298691915229
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5169216944225185
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09365558912386707
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2835570469798658
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4187083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37741023936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17-abliterated/35f89ab6-c6c9-41cd-9296-af4921490c3f.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17-abliterated/35f89ab6-c6c9-41cd-9296-af4921490c3f.json
deleted file mode 100644
index 670e8d3924c325242a637f50d413a804b98789a0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17-abliterated/35f89ab6-c6c9-41cd-9296-af4921490c3f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V17-abliterated/1762652579.902467",
- "retrieved_timestamp": "1762652579.9024682",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V17-abliterated",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V17-abliterated"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7576009432749549
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.520041374505222
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04380664652567976
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42692708333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36220079787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 7.594
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17/0368a3ba-e461-45d1-a037-3b9160a8efbb.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17/0368a3ba-e461-45d1-a037-3b9160a8efbb.json
deleted file mode 100644
index 09a95a6d3225888fdb0382ae804ca10e30c729f1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V17/0368a3ba-e461-45d1-a037-3b9160a8efbb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V17/1762652579.902262",
- "retrieved_timestamp": "1762652579.902263",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V17",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V17"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7940708431406447
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.525086643033107
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2243202416918429
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3221476510067114
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40162499999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39345079787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V18/e5d250e7-8d0a-48b5-aaad-3d1da02eab00.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V18/e5d250e7-8d0a-48b5-aaad-3d1da02eab00.json
deleted file mode 100644
index e90f76e73bba087be80034c57202eff861fbf70e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V18/e5d250e7-8d0a-48b5-aaad-3d1da02eab00.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V18/1762652579.903114",
- "retrieved_timestamp": "1762652579.903115",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V18",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V18"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7834046995305788
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5269802862530547
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21827794561933533
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3213087248322148
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40429166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39419880319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V19/0392cccb-0a1c-486e-876a-1404f14a1080.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V19/0392cccb-0a1c-486e-876a-1404f14a1080.json
deleted file mode 100644
index b2d34b30a6eb11d4986fb6b9f53131cf9ef04c7a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V19/0392cccb-0a1c-486e-876a-1404f14a1080.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V19/1762652579.903361",
- "retrieved_timestamp": "1762652579.903362",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V19",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V19"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7882507302845339
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5276233222408697
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22054380664652568
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3221476510067114
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40429166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3933676861702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2-ORPO/588b0fce-37cd-41f1-8eaa-50383cdc0f00.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2-ORPO/588b0fce-37cd-41f1-8eaa-50383cdc0f00.json
deleted file mode 100644
index fe74092e2d285bae9102d5939ab58b19d095a34b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2-ORPO/588b0fce-37cd-41f1-8eaa-50383cdc0f00.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2-ORPO/1762652579.903775",
- "retrieved_timestamp": "1762652579.903776",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V2-ORPO",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V2-ORPO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7186830941900824
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5075246906772
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18277945619335348
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39349999999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3677692819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 4.015
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2-abliterated/926fb6ed-0750-4d04-8e3c-da470e236db2.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2-abliterated/926fb6ed-0750-4d04-8e3c-da470e236db2.json
deleted file mode 100644
index 6a709041def1dfd0aa3ca6341b14c60d74c9e950..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2-abliterated/926fb6ed-0750-4d04-8e3c-da470e236db2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2-abliterated/1762652579.9039848",
- "retrieved_timestamp": "1762652579.903986",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V2-abliterated",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V2-abliterated"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7895495064207414
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5128868622210663
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21148036253776434
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3910833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38248005319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2/e64503c5-d9ce-4544-8caf-0fec97a2b592.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2/e64503c5-d9ce-4544-8caf-0fec97a2b592.json
deleted file mode 100644
index 8bb5d48747fbf138753245d96c57e1d69006d670..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2/e64503c5-d9ce-4544-8caf-0fec97a2b592.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2/1762652579.9035678",
- "retrieved_timestamp": "1762652579.903569",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V2",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8029384255996312
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5194405455747161
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21601208459214502
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3910208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3896276595744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V20/0ba8bca5-3a61-499a-8e2d-ca84f52ef654.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V20/0ba8bca5-3a61-499a-8e2d-ca84f52ef654.json
deleted file mode 100644
index 7e5f033d32324ad1c35eacc7ae6de826e0c9f63e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V20/0ba8bca5-3a61-499a-8e2d-ca84f52ef654.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V20/1762652579.904202",
- "retrieved_timestamp": "1762652579.904203",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V20",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V20"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7955945779420825
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5244005058415827
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2190332326283988
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32298657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40432291666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3929521276595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V21/380a44ec-387a-4f34-92c2-18fc7a8d5ce0.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V21/380a44ec-387a-4f34-92c2-18fc7a8d5ce0.json
deleted file mode 100644
index ea4c316eaf08328221985fddb83d3d8077b0755c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V21/380a44ec-387a-4f34-92c2-18fc7a8d5ce0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V21/1762652579.904516",
- "retrieved_timestamp": "1762652579.904516",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V21",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V21"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3785145635801894
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33975753940458464
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1593655589123867
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32615625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17137632978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V22/3f44a1c0-b70a-4712-a0c1-bdf3318b270c.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V22/3f44a1c0-b70a-4712-a0c1-bdf3318b270c.json
deleted file mode 100644
index 665c3135dcb724763e107d50e53ff7260d8d7d30..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V22/3f44a1c0-b70a-4712-a0c1-bdf3318b270c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V22/1762652579.9047282",
- "retrieved_timestamp": "1762652579.9047291",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V22",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V22"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7995163942782927
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5244915522507715
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22280966767371602
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32802013422818793
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3989583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3937832446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V23/f83b7584-0e52-4658-ae15-f295064b9111.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V23/f83b7584-0e52-4658-ae15-f295064b9111.json
deleted file mode 100644
index 568605e49c918c9c27ca10f55e1f8e2c03a9a867..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V23/f83b7584-0e52-4658-ae15-f295064b9111.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V23/1762652579.904932",
- "retrieved_timestamp": "1762652579.9049332",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V23",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V23"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7621222799948582
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.519500470668349
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18202416918429004
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3921979166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3666057180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V24/51368b21-1b48-4c07-9b09-8cae0786200b.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V24/51368b21-1b48-4c07-9b09-8cae0786200b.json
deleted file mode 100644
index 7e3d0d6adfb55379312c12e96e9408104c6aabe4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V24/51368b21-1b48-4c07-9b09-8cae0786200b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V24/1762652579.905136",
- "retrieved_timestamp": "1762652579.9051368",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V24",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V24"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5999813827311533
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4777962576721959
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14577039274924472
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3729166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32845744680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V25/52b41117-c308-4e8c-9c61-ce8e4faf778f.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V25/52b41117-c308-4e8c-9c61-ce8e4faf778f.json
deleted file mode 100644
index 9e0cbcdb6cc5b2f5e901666ebfdd33f75c0c148b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V25/52b41117-c308-4e8c-9c61-ce8e4faf778f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V25/1762652579.905337",
- "retrieved_timestamp": "1762652579.905338",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V25",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V25"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33202790817253774
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4546907005207668
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2039274924471299
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3488229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2884807180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V26/8ae81cea-b179-4025-916a-9bc73755de82.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V26/8ae81cea-b179-4025-916a-9bc73755de82.json
deleted file mode 100644
index 45209944275a57403dd6db85a6cd323e581b48e9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V26/8ae81cea-b179-4025-916a-9bc73755de82.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V26/1762652579.905539",
- "retrieved_timestamp": "1762652579.905539",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V26",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V26"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6707979272774018
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5231548583920674
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12462235649546828
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40162499999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39070811170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V27/bf31323b-bfb5-464a-b343-0605dafb5a60.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V27/bf31323b-bfb5-464a-b343-0605dafb5a60.json
deleted file mode 100644
index a45533efd035d71fb193158850cea9935b88bd63..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V27/bf31323b-bfb5-464a-b343-0605dafb5a60.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V27/1762652579.9057322",
- "retrieved_timestamp": "1762652579.905733",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V27",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V27"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.654361538495636
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.52303129292911
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13444108761329304
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39768749999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3902094414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V28/e31561ff-779a-4ebe-b6fe-686b2895c53b.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V28/e31561ff-779a-4ebe-b6fe-686b2895c53b.json
deleted file mode 100644
index da0056098b66ebc90ec3e3a43474bd753c57b7a9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V28/e31561ff-779a-4ebe-b6fe-686b2895c53b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V28/1762652579.905931",
- "retrieved_timestamp": "1762652579.905931",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V28",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V28"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.635252241829457
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5254256199968339
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1268882175226586
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38962499999999994
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3902094414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V29/c383684a-2f70-46e9-ab55-4d68903613b3.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V29/c383684a-2f70-46e9-ab55-4d68903613b3.json
deleted file mode 100644
index 485a9b6126d0aef31f727a6988d0e675504c4c3f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V29/c383684a-2f70-46e9-ab55-4d68903613b3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V29/1762652579.906123",
- "retrieved_timestamp": "1762652579.906123",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V29",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V29"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7417640748768822
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5253330901112457
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16012084592145015
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3263422818791946
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4002604166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3920378989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2L1/015f91ef-9318-44d6-acb2-17628000c273.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2L1/015f91ef-9318-44d6-acb2-17628000c273.json
deleted file mode 100644
index 1d938f1e6073c3de0c79bc10c47363f23f13fd69..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2L1/015f91ef-9318-44d6-acb2-17628000c273.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2L1/1762652579.906316",
- "retrieved_timestamp": "1762652579.906317",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V2L1",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V2L1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3191886416929303
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5013485375260267
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12386706948640483
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38819791666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36377992021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2L2/8e7be46e-af57-4e88-9df5-3161110dfa66.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2L2/8e7be46e-af57-4e88-9df5-3161110dfa66.json
deleted file mode 100644
index 9667f8cb5d192574cb8287e57566c90e95ca8ec8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V2L2/8e7be46e-af57-4e88-9df5-3161110dfa66.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V2L2/1762652579.9065118",
- "retrieved_timestamp": "1762652579.906513",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V2L2",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V2L2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8020640788662969
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5202843665402132
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20166163141993956
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39746875000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38838098404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V3/6b8fca40-f44b-45a0-bd5b-04b2fa2067a2.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V3/6b8fca40-f44b-45a0-bd5b-04b2fa2067a2.json
deleted file mode 100644
index 51791f21c5e9ae76d848946c6283a5f63647aa29..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V3/6b8fca40-f44b-45a0-bd5b-04b2fa2067a2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V3/1762652579.906709",
- "retrieved_timestamp": "1762652579.90671",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V3",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7886751596874072
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5265064133535374
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16767371601208458
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3221476510067114
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4016875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38040226063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V30/839ff423-8c5c-4fab-aecf-b535ee06af36.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V30/839ff423-8c5c-4fab-aecf-b535ee06af36.json
deleted file mode 100644
index a7157686fc9123efb990539f84c2acfe1eb6a23a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V30/839ff423-8c5c-4fab-aecf-b535ee06af36.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V30/1762652579.907134",
- "retrieved_timestamp": "1762652579.907138",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V30",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V30"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7435626360279614
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5243248855841048
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15861027190332327
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4029270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3943650265957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V4/9330c290-ee47-4a7d-9b8f-62903402e0e3.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V4/9330c290-ee47-4a7d-9b8f-62903402e0e3.json
deleted file mode 100644
index fe4ad996b4c86b7f10f13da4c7d46d9c1c856e54..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V4/9330c290-ee47-4a7d-9b8f-62903402e0e3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V4/1762652579.9075332",
- "retrieved_timestamp": "1762652579.907535",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V4",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7807317916461656
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5245974297200655
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19259818731117825
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4028958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37882313829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V6/09670c05-9463-479f-89e3-5029fd5d7ee7.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V6/09670c05-9463-479f-89e3-5029fd5d7ee7.json
deleted file mode 100644
index 64a38cbdcc57b26c6d44eb590289afb5e45a66be..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V6/09670c05-9463-479f-89e3-5029fd5d7ee7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V6/1762652579.9077919",
- "retrieved_timestamp": "1762652579.9077928",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V6",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V6"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7837792612490415
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5239561762634447
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20241691842900303
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4068020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37591422872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V7/c6a9173a-bacc-40bd-9572-239f9901e065.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V7/c6a9173a-bacc-40bd-9572-239f9901e065.json
deleted file mode 100644
index bdefea2a3ee0a00891726faca546046848d93fcd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V7/c6a9173a-bacc-40bd-9572-239f9901e065.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V7/1762652579.908076",
- "retrieved_timestamp": "1762652579.908077",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V7",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V7"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7786085364610345
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5070394117180643
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14803625377643503
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41616666666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3812333776595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V8/c0035841-a312-493e-9c44-a75133e894d1.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V8/c0035841-a312-493e-9c44-a75133e894d1.json
deleted file mode 100644
index 64a5ee0870ae37e20ed283ad0f2027e66704997e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V8/c0035841-a312-493e-9c44-a75133e894d1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V8/1762652579.908298",
- "retrieved_timestamp": "1762652579.908299",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V8",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V8"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7913979352562313
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5064510419864701
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13293051359516617
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.287751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.421375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37608045212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V9/f5876dc1-b769-431f-84fe-365d2457902e.json b/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V9/f5876dc1-b769-431f-84fe-365d2457902e.json
deleted file mode 100644
index d3c8d112e9cc32fb75d0984c43ede71e8083dd6d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/T145/T145_ZEUS-8B-V9/f5876dc1-b769-431f-84fe-365d2457902e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_ZEUS-8B-V9/1762652579.908509",
- "retrieved_timestamp": "1762652579.90851",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/ZEUS-8B-V9",
- "developer": "T145",
- "inference_platform": "unknown",
- "id": "T145/ZEUS-8B-V9"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5551436854213487
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5207256346477752
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21374622356495468
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3949270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39012632978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat-1m-hf/077f7956-8c9b-47ef-8c4d-40455bbb0027.json b/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat-1m-hf/077f7956-8c9b-47ef-8c4d-40455bbb0027.json
deleted file mode 100644
index 7c926c9d31b409d8f9c4cd9b1d7f5e9d0e68e473..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat-1m-hf/077f7956-8c9b-47ef-8c4d-40455bbb0027.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b-chat-1m-hf/1762652579.9096901",
- "retrieved_timestamp": "1762652579.9096909",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "THUDM/glm-4-9b-chat-1m-hf",
- "developer": "THUDM",
- "inference_platform": "unknown",
- "id": "THUDM/glm-4-9b-chat-1m-hf"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5341106043076814
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3900953106836365
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04833836858006042
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36888541666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18143284574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GlmForCausalLM",
- "params_billions": 9.484
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat-1m/f0c306f0-683e-4582-81b7-f0a2c372060f.json b/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat-1m/f0c306f0-683e-4582-81b7-f0a2c372060f.json
deleted file mode 100644
index d5f1bcecce65d6de75520f65800363974accd6ec..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat-1m/f0c306f0-683e-4582-81b7-f0a2c372060f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b-chat-1m/1762652579.909478",
- "retrieved_timestamp": "1762652579.909479",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "THUDM/glm-4-9b-chat-1m",
- "developer": "THUDM",
- "inference_platform": "unknown",
- "id": "THUDM/glm-4-9b-chat-1m"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41800578218330303
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3794583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31632313829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "ChatGLMModel",
- "params_billions": 9.484
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat-hf/0af9353e-10d5-42e3-8bc9-4c736720ff30.json b/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat-hf/0af9353e-10d5-42e3-8bc9-4c736720ff30.json
deleted file mode 100644
index 6848dd5fddc1ba2903a9bf4329e8444074cffc0e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat-hf/0af9353e-10d5-42e3-8bc9-4c736720ff30.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b-chat-hf/1762652579.909895",
- "retrieved_timestamp": "1762652579.909896",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "THUDM/glm-4-9b-chat-hf",
- "developer": "THUDM",
- "inference_platform": "unknown",
- "id": "THUDM/glm-4-9b-chat-hf"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6513140688927601
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4432308604245425
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08459214501510574
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35930208333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27742686170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GlmForCausalLM",
- "params_billions": 9.4
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat/e7c5d8ef-d480-4ab9-b698-409e5ea76cf8.json b/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat/e7c5d8ef-d480-4ab9-b698-409e5ea76cf8.json
deleted file mode 100644
index 6c58e016b77f523428769933412c0f7b86397577..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b-chat/e7c5d8ef-d480-4ab9-b698-409e5ea76cf8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b-chat/1762652579.909267",
- "retrieved_timestamp": "1762652579.909267",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "THUDM/glm-4-9b-chat",
- "developer": "THUDM",
- "inference_platform": "unknown",
- "id": "THUDM/glm-4-9b-chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47363884291035735
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3994270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.316655585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "ChatGLMModelM",
- "params_billions": 9.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b/bd038a6c-1241-401d-962d-e033434ba735.json b/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b/bd038a6c-1241-401d-962d-e033434ba735.json
deleted file mode 100644
index 787462be2eccdec56f6db35d13646d6c40d80094..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/THUDM/THUDM_glm-4-9b/bd038a6c-1241-401d-962d-e033434ba735.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/THUDM_glm-4-9b/1762652579.9090161",
- "retrieved_timestamp": "1762652579.9090161",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "THUDM/glm-4-9b",
- "developer": "THUDM",
- "inference_platform": "unknown",
- "id": "THUDM/glm-4-9b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1426082793654171
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5528368141665274
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4385833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4144780585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "ChatGLMModelM",
- "params_billions": 9.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/TIGER-Lab/TIGER-Lab_AceCodeRM-7B/eb1d6ce5-3b0c-477d-9ca6-2f3ff8bc4e30.json b/leaderboard_data/HFOpenLLMv2/TIGER-Lab/TIGER-Lab_AceCodeRM-7B/eb1d6ce5-3b0c-477d-9ca6-2f3ff8bc4e30.json
deleted file mode 100644
index cfa3bb51544c6543e5addf90db1ff38ea9b34976..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/TIGER-Lab/TIGER-Lab_AceCodeRM-7B/eb1d6ce5-3b0c-477d-9ca6-2f3ff8bc4e30.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TIGER-Lab_AceCodeRM-7B/1762652579.9101062",
- "retrieved_timestamp": "1762652579.910107",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TIGER-Lab/AceCodeRM-7B",
- "developer": "TIGER-Lab",
- "inference_platform": "unknown",
- "id": "TIGER-Lab/AceCodeRM-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5854931581536988
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4773230085351336
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3466767371601209
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41920833333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3361037234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalRM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/TIGER-Lab/TIGER-Lab_MAmmoTH2-7B-Plus/93503cc0-80aa-44b5-9155-c81cd44a9ac9.json b/leaderboard_data/HFOpenLLMv2/TIGER-Lab/TIGER-Lab_MAmmoTH2-7B-Plus/93503cc0-80aa-44b5-9155-c81cd44a9ac9.json
deleted file mode 100644
index 4bb4ad4adb9c2a3e50bef1d991843d28e4aeef4d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/TIGER-Lab/TIGER-Lab_MAmmoTH2-7B-Plus/93503cc0-80aa-44b5-9155-c81cd44a9ac9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TIGER-Lab_MAmmoTH2-7B-Plus/1762652579.9110248",
- "retrieved_timestamp": "1762652579.911026",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TIGER-Lab/MAmmoTH2-7B-Plus",
- "developer": "TIGER-Lab",
- "inference_platform": "unknown",
- "id": "TIGER-Lab/MAmmoTH2-7B-Plus"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5574664113441224
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42346949888019064
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18580060422960726
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41235416666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30169547872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/TeeZee/TeeZee_DoubleBagel-57B-v1.0/1315f2ad-2e39-4cab-b09a-c74d0779f895.json b/leaderboard_data/HFOpenLLMv2/TeeZee/TeeZee_DoubleBagel-57B-v1.0/1315f2ad-2e39-4cab-b09a-c74d0779f895.json
deleted file mode 100644
index 676d381bb1f34517ee96ffc7b73f29f1925a0f6a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/TeeZee/TeeZee_DoubleBagel-57B-v1.0/1315f2ad-2e39-4cab-b09a-c74d0779f895.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TeeZee_DoubleBagel-57B-v1.0/1762652579.9121659",
- "retrieved_timestamp": "1762652579.9121659",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TeeZee/DoubleBagel-57B-v1.0",
- "developer": "TeeZee",
- "inference_platform": "unknown",
- "id": "TeeZee/DoubleBagel-57B-v1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23363342597640924
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.325078559362514
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.009818731117824773
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43148958333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14777260638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 56.703
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Telugu-LLM-Labs/Telugu-LLM-Labs_Indic-gemma-2b-finetuned-sft-Navarasa-2.0/ec8a8e25-f985-40a8-80ff-0c7d7595029d.json b/leaderboard_data/HFOpenLLMv2/Telugu-LLM-Labs/Telugu-LLM-Labs_Indic-gemma-2b-finetuned-sft-Navarasa-2.0/ec8a8e25-f985-40a8-80ff-0c7d7595029d.json
deleted file mode 100644
index c073d9459410b753b42c473e3469cc08dbc0d446..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Telugu-LLM-Labs/Telugu-LLM-Labs_Indic-gemma-2b-finetuned-sft-Navarasa-2.0/ec8a8e25-f985-40a8-80ff-0c7d7595029d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Telugu-LLM-Labs_Indic-gemma-2b-finetuned-sft-Navarasa-2.0/1762652579.912417",
- "retrieved_timestamp": "1762652579.912417",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0",
- "developer": "Telugu-LLM-Labs",
- "inference_platform": "unknown",
- "id": "Telugu-LLM-Labs/Indic-gemma-2b-finetuned-sft-Navarasa-2.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21030310686755588
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3240881373468133
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.027190332326283987
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24328859060402686
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3899375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12790890957446807
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 2.506
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Telugu-LLM-Labs/Telugu-LLM-Labs_Indic-gemma-7b-finetuned-sft-Navarasa-2.0/89d117f3-7a67-4e30-82b2-b42efaf44024.json b/leaderboard_data/HFOpenLLMv2/Telugu-LLM-Labs/Telugu-LLM-Labs_Indic-gemma-7b-finetuned-sft-Navarasa-2.0/89d117f3-7a67-4e30-82b2-b42efaf44024.json
deleted file mode 100644
index c44ef73786c8b5a264c86c32f9a18f64f7373925..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Telugu-LLM-Labs/Telugu-LLM-Labs_Indic-gemma-7b-finetuned-sft-Navarasa-2.0/89d117f3-7a67-4e30-82b2-b42efaf44024.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Telugu-LLM-Labs_Indic-gemma-7b-finetuned-sft-Navarasa-2.0/1762652579.912673",
- "retrieved_timestamp": "1762652579.912673",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0",
- "developer": "Telugu-LLM-Labs",
- "inference_platform": "unknown",
- "id": "Telugu-LLM-Labs/Indic-gemma-7b-finetuned-sft-Navarasa-2.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32368449048524583
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40229948924733394
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0256797583081571
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40832291666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23503989361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 8.538
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/TencentARC/TencentARC_LLaMA-Pro-8B-Instruct/98ea850e-7019-4728-a558-8b1819ec47c2.json b/leaderboard_data/HFOpenLLMv2/TencentARC/TencentARC_LLaMA-Pro-8B-Instruct/98ea850e-7019-4728-a558-8b1819ec47c2.json
deleted file mode 100644
index 88ca70a09f005b7a0653fef60c474f355133e3ad..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/TencentARC/TencentARC_LLaMA-Pro-8B-Instruct/98ea850e-7019-4728-a558-8b1819ec47c2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TencentARC_LLaMA-Pro-8B-Instruct/1762652579.9131231",
- "retrieved_timestamp": "1762652579.913124",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TencentARC/LLaMA-Pro-8B-Instruct",
- "developer": "TencentARC",
- "inference_platform": "unknown",
- "id": "TencentARC/LLaMA-Pro-8B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4486063644463357
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4224205282459997
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.024924471299093656
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41902083333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19456449468085107
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.357
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/TheDrummer/TheDrummer_Cydonia-22B-v1.2/4a3e8df4-8e21-4c7c-aec8-afe353831c3d.json b/leaderboard_data/HFOpenLLMv2/TheDrummer/TheDrummer_Cydonia-22B-v1.2/4a3e8df4-8e21-4c7c-aec8-afe353831c3d.json
deleted file mode 100644
index 315c7016fee87f5b5e1dc28eb9fc6e4db83581b3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/TheDrummer/TheDrummer_Cydonia-22B-v1.2/4a3e8df4-8e21-4c7c-aec8-afe353831c3d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TheDrummer_Cydonia-22B-v1.2/1762652579.9138188",
- "retrieved_timestamp": "1762652579.9138198",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TheDrummer/Cydonia-22B-v1.2",
- "developer": "TheDrummer",
- "inference_platform": "unknown",
- "id": "TheDrummer/Cydonia-22B-v1.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5635114828654637
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.580856074392761
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20317220543806647
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40217708333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4140625
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 22.247
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/TheDrummer/TheDrummer_Ministrations-8B-v1/21d5973e-d827-4bd6-b050-346da350a0aa.json b/leaderboard_data/HFOpenLLMv2/TheDrummer/TheDrummer_Ministrations-8B-v1/21d5973e-d827-4bd6-b050-346da350a0aa.json
deleted file mode 100644
index 6de98210c9ac79183ecc4db810e6603af9c9b4eb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/TheDrummer/TheDrummer_Ministrations-8B-v1/21d5973e-d827-4bd6-b050-346da350a0aa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TheDrummer_Ministrations-8B-v1/1762652579.9148722",
- "retrieved_timestamp": "1762652579.9148731",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TheDrummer/Ministrations-8B-v1",
- "developer": "TheDrummer",
- "inference_platform": "unknown",
- "id": "TheDrummer/Ministrations-8B-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28219346888478125
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48766312602251366
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18429003021148035
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32466442953020136
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44490625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36436170212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 8.02
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/TheDrummer/TheDrummer_Rocinante-12B-v1/f21e98c1-5535-4cb4-a9f0-541e49aff795.json b/leaderboard_data/HFOpenLLMv2/TheDrummer/TheDrummer_Rocinante-12B-v1/f21e98c1-5535-4cb4-a9f0-541e49aff795.json
deleted file mode 100644
index 2da455ecddaabe46e6a419f591218f11c230ec26..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/TheDrummer/TheDrummer_Rocinante-12B-v1/f21e98c1-5535-4cb4-a9f0-541e49aff795.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TheDrummer_Rocinante-12B-v1/1762652579.915099",
- "retrieved_timestamp": "1762652579.9150999",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TheDrummer/Rocinante-12B-v1",
- "developer": "TheDrummer",
- "inference_platform": "unknown",
- "id": "TheDrummer/Rocinante-12B-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6076499244227538
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5065452085797449
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1268882175226586
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40171874999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34773936170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/TheDrunkenSnail/TheDrunkenSnail_Daughter-of-Rhodia-12B/0f1c48a7-2a20-40c8-88e8-bdfdc3cdad40.json b/leaderboard_data/HFOpenLLMv2/TheDrunkenSnail/TheDrunkenSnail_Daughter-of-Rhodia-12B/0f1c48a7-2a20-40c8-88e8-bdfdc3cdad40.json
deleted file mode 100644
index adb02b5d5dd7f5d113acf7e3e1ca1973ba600d61..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/TheDrunkenSnail/TheDrunkenSnail_Daughter-of-Rhodia-12B/0f1c48a7-2a20-40c8-88e8-bdfdc3cdad40.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TheDrunkenSnail_Daughter-of-Rhodia-12B/1762652579.91594",
- "retrieved_timestamp": "1762652579.9159412",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TheDrunkenSnail/Daughter-of-Rhodia-12B",
- "developer": "TheDrunkenSnail",
- "inference_platform": "unknown",
- "id": "TheDrunkenSnail/Daughter-of-Rhodia-12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6903815210308648
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5179174184876773
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12235649546827794
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31711409395973156
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43477083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3641123670212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/TheDrunkenSnail/TheDrunkenSnail_Mother-of-Rhodia-12B/2178eb24-2558-44db-aff1-7903c2e0f657.json b/leaderboard_data/HFOpenLLMv2/TheDrunkenSnail/TheDrunkenSnail_Mother-of-Rhodia-12B/2178eb24-2558-44db-aff1-7903c2e0f657.json
deleted file mode 100644
index 5b0e8d22ee2e12ce6533a805ffa77a871eae8e0a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/TheDrunkenSnail/TheDrunkenSnail_Mother-of-Rhodia-12B/2178eb24-2558-44db-aff1-7903c2e0f657.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TheDrunkenSnail_Mother-of-Rhodia-12B/1762652579.9161909",
- "retrieved_timestamp": "1762652579.9161909",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TheDrunkenSnail/Mother-of-Rhodia-12B",
- "developer": "TheDrunkenSnail",
- "inference_platform": "unknown",
- "id": "TheDrunkenSnail/Mother-of-Rhodia-12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6504895898438365
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49479138664574934
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12235649546827794
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41241666666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35513630319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/TheDrunkenSnail/TheDrunkenSnail_Son-of-Rhodia/22c87268-7e49-42b4-9bbb-16a4b305c595.json b/leaderboard_data/HFOpenLLMv2/TheDrunkenSnail/TheDrunkenSnail_Son-of-Rhodia/22c87268-7e49-42b4-9bbb-16a4b305c595.json
deleted file mode 100644
index dc8c8c2a15412781b809cb0330ff6f9759f25c81..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/TheDrunkenSnail/TheDrunkenSnail_Son-of-Rhodia/22c87268-7e49-42b4-9bbb-16a4b305c595.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TheDrunkenSnail_Son-of-Rhodia/1762652579.916397",
- "retrieved_timestamp": "1762652579.916397",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TheDrunkenSnail/Son-of-Rhodia",
- "developer": "TheDrunkenSnail",
- "inference_platform": "unknown",
- "id": "TheDrunkenSnail/Son-of-Rhodia"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7046447869430887
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5097327647725524
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13141993957703926
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4202916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3607878989361702
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/TheHierophant/TheHierophant_Underground-Cognitive-V0.3-test/872cc338-765c-4291-8b50-77b4bce719fd.json b/leaderboard_data/HFOpenLLMv2/TheHierophant/TheHierophant_Underground-Cognitive-V0.3-test/872cc338-765c-4291-8b50-77b4bce719fd.json
deleted file mode 100644
index e25147ffaf19963e4506dce1d8ec28f1dce33a9a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/TheHierophant/TheHierophant_Underground-Cognitive-V0.3-test/872cc338-765c-4291-8b50-77b4bce719fd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TheHierophant_Underground-Cognitive-V0.3-test/1762652579.916598",
- "retrieved_timestamp": "1762652579.916598",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TheHierophant/Underground-Cognitive-V0.3-test",
- "developer": "TheHierophant",
- "inference_platform": "unknown",
- "id": "TheHierophant/Underground-Cognitive-V0.3-test"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4808297539417634
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5290131900998047
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05891238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43511458333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.331781914893617
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.732
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/TheTsar1209/TheTsar1209_nemo-carpmuscle-v0.1/8e834483-df6f-4d58-8257-f0cd1d8e3aa1.json b/leaderboard_data/HFOpenLLMv2/TheTsar1209/TheTsar1209_nemo-carpmuscle-v0.1/8e834483-df6f-4d58-8257-f0cd1d8e3aa1.json
deleted file mode 100644
index 681692c2e177c5dfbee6184aaa896332dcacf0dd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/TheTsar1209/TheTsar1209_nemo-carpmuscle-v0.1/8e834483-df6f-4d58-8257-f0cd1d8e3aa1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TheTsar1209_nemo-carpmuscle-v0.1/1762652579.9168499",
- "retrieved_timestamp": "1762652579.916851",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TheTsar1209/nemo-carpmuscle-v0.1",
- "developer": "TheTsar1209",
- "inference_platform": "unknown",
- "id": "TheTsar1209/nemo-carpmuscle-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2275639746982451
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5083529697101391
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04758308157099698
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4135
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3405917553191489
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Tijmen2/Tijmen2_cosmosage-v3/f1eed2d5-89ca-4757-a5f9-9a90e811f075.json b/leaderboard_data/HFOpenLLMv2/Tijmen2/Tijmen2_cosmosage-v3/f1eed2d5-89ca-4757-a5f9-9a90e811f075.json
deleted file mode 100644
index efccccc2766d126cd5375577b59b118e66758d47..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Tijmen2/Tijmen2_cosmosage-v3/f1eed2d5-89ca-4757-a5f9-9a90e811f075.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Tijmen2_cosmosage-v3/1762652579.918411",
- "retrieved_timestamp": "1762652579.918412",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Tijmen2/cosmosage-v3",
- "developer": "Tijmen2",
- "inference_platform": "unknown",
- "id": "Tijmen2/cosmosage-v3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44823180272787316
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4550637900339029
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05060422960725076
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4198854166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24858710106382978
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v0.1/818cb0a4-7458-4cee-aca8-7cc72db341f8.json b/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v0.1/818cb0a4-7458-4cee-aca8-7cc72db341f8.json
deleted file mode 100644
index 4acc4eebc6f3d909a5f49346c3ac885a6a2ab0bd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v0.1/818cb0a4-7458-4cee-aca8-7cc72db341f8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-Chat-v0.1/1762652579.918663",
- "retrieved_timestamp": "1762652579.918664",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TinyLlama/TinyLlama-1.1B-Chat-v0.1",
- "developer": "TinyLlama",
- "inference_platform": "unknown",
- "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1478543597654224
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30835294748680114
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.006042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22902684563758388
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35923958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10979055851063829
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.1
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v0.5/96454d40-4535-4439-87be-0ea7b55cd88a.json b/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v0.5/96454d40-4535-4439-87be-0ea7b55cd88a.json
deleted file mode 100644
index 4c4ebe171cfc418f511eeb8e54ed2617863d64f7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v0.5/96454d40-4535-4439-87be-0ea7b55cd88a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-Chat-v0.5/1762652579.918914",
- "retrieved_timestamp": "1762652579.918914",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TinyLlama/TinyLlama-1.1B-Chat-v0.5",
- "developer": "TinyLlama",
- "inference_platform": "unknown",
- "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1633665341294432
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3105046915935697
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0037764350453172208
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2483221476510067
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36612500000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10962433510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.1
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v0.6/be032e7e-39b5-4153-81b9-c29115b231b4.json b/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v0.6/be032e7e-39b5-4153-81b9-c29115b231b4.json
deleted file mode 100644
index 64b9a7ceb9f60def5eb15bd5158434f2d3f1fe02..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v0.6/be032e7e-39b5-4153-81b9-c29115b231b4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-Chat-v0.6/1762652579.919127",
- "retrieved_timestamp": "1762652579.919127",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TinyLlama/TinyLlama-1.1B-Chat-v0.6",
- "developer": "TinyLlama",
- "inference_platform": "unknown",
- "id": "TinyLlama/TinyLlama-1.1B-Chat-v0.6"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15742119797692344
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3066976656166826
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015861027190332326
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25838926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34221875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11486037234042554
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.1
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v1.0/0a24d7b1-44eb-4f5b-ae2f-ddee372facd5.json b/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v1.0/0a24d7b1-44eb-4f5b-ae2f-ddee372facd5.json
deleted file mode 100644
index a09d007f0a919dfc793b51a731384bcf94933450..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/TinyLlama/TinyLlama_TinyLlama-1.1B-Chat-v1.0/0a24d7b1-44eb-4f5b-ae2f-ddee372facd5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TinyLlama_TinyLlama-1.1B-Chat-v1.0/1762652579.9193401",
- "retrieved_timestamp": "1762652579.919341",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
- "developer": "TinyLlama",
- "inference_platform": "unknown",
- "id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0595763684800773
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3103562867491015
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015105740181268883
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35152083333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11012300531914894
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.1
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ToastyPigeon/ToastyPigeon_Sto-vo-kor-12B/1c795b39-a382-4315-8b6b-626423b9ccfe.json b/leaderboard_data/HFOpenLLMv2/ToastyPigeon/ToastyPigeon_Sto-vo-kor-12B/1c795b39-a382-4315-8b6b-626423b9ccfe.json
deleted file mode 100644
index f43508f3697c8a302fc4ebe8ad6a23b24dcc93ee..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ToastyPigeon/ToastyPigeon_Sto-vo-kor-12B/1c795b39-a382-4315-8b6b-626423b9ccfe.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ToastyPigeon_Sto-vo-kor-12B/1762652579.920128",
- "retrieved_timestamp": "1762652579.920129",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ToastyPigeon/Sto-vo-kor-12B",
- "developer": "ToastyPigeon",
- "inference_platform": "unknown",
- "id": "ToastyPigeon/Sto-vo-kor-12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5501225636865739
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5064617128925814
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10876132930513595
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39384375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33976063829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Trappu/Trappu_Magnum-Picaro-0.7-v2-12b/77871404-f2e3-46f9-8c48-808fb89442cc.json b/leaderboard_data/HFOpenLLMv2/Trappu/Trappu_Magnum-Picaro-0.7-v2-12b/77871404-f2e3-46f9-8c48-808fb89442cc.json
deleted file mode 100644
index 922ffdcaaa6f62318a5c6bb327849e86e1453d4f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Trappu/Trappu_Magnum-Picaro-0.7-v2-12b/77871404-f2e3-46f9-8c48-808fb89442cc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Trappu_Magnum-Picaro-0.7-v2-12b/1762652579.920383",
- "retrieved_timestamp": "1762652579.920383",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Trappu/Magnum-Picaro-0.7-v2-12b",
- "developer": "Trappu",
- "inference_platform": "unknown",
- "id": "Trappu/Magnum-Picaro-0.7-v2-12b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.300278815764394
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5506661918828847
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06646525679758308
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32298657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47271875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35804521276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Trappu/Trappu_Nemo-Picaro-12B/37534f85-e1ae-482b-89d0-480c4bbc50e7.json b/leaderboard_data/HFOpenLLMv2/Trappu/Trappu_Nemo-Picaro-12B/37534f85-e1ae-482b-89d0-480c4bbc50e7.json
deleted file mode 100644
index 42fd4f97aab3d08e28e859b799625f081f409b9d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Trappu/Trappu_Nemo-Picaro-12B/37534f85-e1ae-482b-89d0-480c4bbc50e7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Trappu_Nemo-Picaro-12B/1762652579.92064",
- "retrieved_timestamp": "1762652579.92064",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Trappu/Nemo-Picaro-12B",
- "developer": "Trappu",
- "inference_platform": "unknown",
- "id": "Trappu/Nemo-Picaro-12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2577139766929525
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5489586125997546
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08459214501510574
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3271812080536913
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47259375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36045545212765956
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Tremontaine/Tremontaine_L3-12B-Lunaris-v1/51e5f1f2-a43a-4ade-9207-1b15d172ba08.json b/leaderboard_data/HFOpenLLMv2/Tremontaine/Tremontaine_L3-12B-Lunaris-v1/51e5f1f2-a43a-4ade-9207-1b15d172ba08.json
deleted file mode 100644
index b5c065e2f4fd4ddc123d0372742b38358f358436..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Tremontaine/Tremontaine_L3-12B-Lunaris-v1/51e5f1f2-a43a-4ade-9207-1b15d172ba08.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Tremontaine_L3-12B-Lunaris-v1/1762652579.920848",
- "retrieved_timestamp": "1762652579.920848",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Tremontaine/L3-12B-Lunaris-v1",
- "developer": "Tremontaine",
- "inference_platform": "unknown",
- "id": "Tremontaine/L3-12B-Lunaris-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6909311737301471
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5230217237244009
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08761329305135952
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3673645833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3774933510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 11.52
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Annunaki-12b/28f9e91f-b32f-4b8f-ae18-126c7bbe6e7d.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Annunaki-12b/28f9e91f-b32f-4b8f-ae18-126c7bbe6e7d.json
deleted file mode 100644
index 46e556f25034da178890f366fa13853c777339ef..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Annunaki-12b/28f9e91f-b32f-4b8f-ae18-126c7bbe6e7d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Annunaki-12b/1762652579.921084",
- "retrieved_timestamp": "1762652579.921084",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Annunaki-12b",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Annunaki-12b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3872070550583563
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5498969437971782
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1216012084592145
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3213087248322148
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44087499999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3720910904255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_BigTalker-Lite-8B/befea823-7dc5-4e69-81e3-e75c4ff117ac.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_BigTalker-Lite-8B/befea823-7dc5-4e69-81e3-e75c4ff117ac.json
deleted file mode 100644
index 5a16fb39d85d2ca58f364a9e8979c4a55bb25e0f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_BigTalker-Lite-8B/befea823-7dc5-4e69-81e3-e75c4ff117ac.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_BigTalker-Lite-8B/1762652579.92133",
- "retrieved_timestamp": "1762652579.921331",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/BigTalker-Lite-8B",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/BigTalker-Lite-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3689222374411007
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5308138241234059
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10196374622356495
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42084375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34308510638297873
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Chatty-Harry_V2.0/f2dcc214-e25c-4c73-97f0-4e47304df09b.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Chatty-Harry_V2.0/f2dcc214-e25c-4c73-97f0-4e47304df09b.json
deleted file mode 100644
index 2691536ab97c4170c78a69c855ac7979c81d63d6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Chatty-Harry_V2.0/f2dcc214-e25c-4c73-97f0-4e47304df09b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Chatty-Harry_V2.0/1762652579.921529",
- "retrieved_timestamp": "1762652579.92153",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Chatty-Harry_V2.0",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Chatty-Harry_V2.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3325520729442324
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5318928049062546
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13897280966767372
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32298657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40782291666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36826795212765956
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Chatty-Harry_V3.0/b9b23a78-beea-4c4b-8bb8-d5a18a05ffce.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Chatty-Harry_V3.0/b9b23a78-beea-4c4b-8bb8-d5a18a05ffce.json
deleted file mode 100644
index f39117824d2dca87118b4eaa5965426bc69cacf9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Chatty-Harry_V3.0/b9b23a78-beea-4c4b-8bb8-d5a18a05ffce.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Chatty-Harry_V3.0/1762652579.9217439",
- "retrieved_timestamp": "1762652579.9217439",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Chatty-Harry_V3.0",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Chatty-Harry_V3.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36749823800848413
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5526193453608234
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11253776435045318
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32298657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44084375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37017952127659576
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Chronos-Prism_V1.0/13bb7db2-9d89-4dce-950a-14ccfb3492aa.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Chronos-Prism_V1.0/13bb7db2-9d89-4dce-950a-14ccfb3492aa.json
deleted file mode 100644
index 98768898290251cf239ab2034de637748bd54bf7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Chronos-Prism_V1.0/13bb7db2-9d89-4dce-950a-14ccfb3492aa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Chronos-Prism_V1.0/1762652579.921948",
- "retrieved_timestamp": "1762652579.921948",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Chronos-Prism_V1.0",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Chronos-Prism_V1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3259329689667859
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5554188807010064
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12009063444108761
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4262708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36727061170212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_DS-R1-Distill-Q2.5-10B-Harmony/ff136a9d-7e29-4a44-86be-c69bc115102e.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_DS-R1-Distill-Q2.5-10B-Harmony/ff136a9d-7e29-4a44-86be-c69bc115102e.json
deleted file mode 100644
index 99f4ed33516393ea9e2a59af4caa493ad2da6bcf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_DS-R1-Distill-Q2.5-10B-Harmony/ff136a9d-7e29-4a44-86be-c69bc115102e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_DS-R1-Distill-Q2.5-10B-Harmony/1762652579.9225988",
- "retrieved_timestamp": "1762652579.9226",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/DS-R1-Distill-Q2.5-10B-Harmony",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/DS-R1-Distill-Q2.5-10B-Harmony"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17508211545366295
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2643276743386568
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2105704697986577
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31276041666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11727061170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 10.366
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_DS-R1-Distill-Q2.5-14B-Harmony_V0.1/63bc0215-741c-48ab-8ce3-d4c036c74a42.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_DS-R1-Distill-Q2.5-14B-Harmony_V0.1/63bc0215-741c-48ab-8ce3-d4c036c74a42.json
deleted file mode 100644
index 58fd8b1a14a913d01a9360cc492f6a6e84d78c74..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_DS-R1-Distill-Q2.5-14B-Harmony_V0.1/63bc0215-741c-48ab-8ce3-d4c036c74a42.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_DS-R1-Distill-Q2.5-14B-Harmony_V0.1/1762652579.9228039",
- "retrieved_timestamp": "1762652579.922805",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/DS-R1-Distill-Q2.5-14B-Harmony_V0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4515042309959796
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5783379428926061
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5551359516616314
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3934563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5566875000000001
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4601063829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_DS-R1-Distill-Q2.5-7B-RP/5515e597-5f9f-46eb-8d3f-0482bdd69715.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_DS-R1-Distill-Q2.5-7B-RP/5515e597-5f9f-46eb-8d3f-0482bdd69715.json
deleted file mode 100644
index 47f835353d8e0073626924f8c5fe20852ae4dd3a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_DS-R1-Distill-Q2.5-7B-RP/5515e597-5f9f-46eb-8d3f-0482bdd69715.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_DS-R1-Distill-Q2.5-7B-RP/1762652579.923009",
- "retrieved_timestamp": "1762652579.923009",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/DS-R1-Distill-Q2.5-7B-RP",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/DS-R1-Distill-Q2.5-7B-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34454248061809334
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43834886662348205
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46827794561933533
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40302083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2890625
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Dark-Chivalry_V1.0/ed3b441b-272c-4bc4-8839-aa6055a6ccbc.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Dark-Chivalry_V1.0/ed3b441b-272c-4bc4-8839-aa6055a6ccbc.json
deleted file mode 100644
index c0b531106ed1c09511c865a90483115a2865eb47..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Dark-Chivalry_V1.0/ed3b441b-272c-4bc4-8839-aa6055a6ccbc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Dark-Chivalry_V1.0/1762652579.923868",
- "retrieved_timestamp": "1762652579.923869",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Dark-Chivalry_V1.0",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Dark-Chivalry_V1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4325700253106203
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4974207759950637
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13141993957703926
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4181770833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34441489361702127
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Distilled-DarkPlanet-Allades-8B/2d57a30c-8a0e-4f18-bb2d-6bf4536bbc86.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Distilled-DarkPlanet-Allades-8B/2d57a30c-8a0e-4f18-bb2d-6bf4536bbc86.json
deleted file mode 100644
index 47a5d180b443c2ee151329d8b917de7f4b1b0879..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Distilled-DarkPlanet-Allades-8B/2d57a30c-8a0e-4f18-bb2d-6bf4536bbc86.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Distilled-DarkPlanet-Allades-8B/1762652579.9240808",
- "retrieved_timestamp": "1762652579.9240808",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Distilled-DarkPlanet-Allades-8B",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Distilled-DarkPlanet-Allades-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3460163477351206
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4633948672868899
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4003021148036254
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29014295212765956
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Distilled-DarkPlanet-Allades-8B_TIES/9bff68b3-82a4-49b5-90a7-3c0038ddc35a.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Distilled-DarkPlanet-Allades-8B_TIES/9bff68b3-82a4-49b5-90a7-3c0038ddc35a.json
deleted file mode 100644
index 4d82fd62f17242cf6c915feaf1d01ede83532feb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Distilled-DarkPlanet-Allades-8B_TIES/9bff68b3-82a4-49b5-90a7-3c0038ddc35a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Distilled-DarkPlanet-Allades-8B_TIES/1762652579.924282",
- "retrieved_timestamp": "1762652579.924282",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Distilled-DarkPlanet-Allades-8B_TIES",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Distilled-DarkPlanet-Allades-8B_TIES"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3891807071902552
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5041556910813355
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09063444108761329
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3145973154362416
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3868020833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.340093085106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Distilled-Whiskey-8b/cf34d222-197f-4d3d-9786-fb5c019f2552.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Distilled-Whiskey-8b/cf34d222-197f-4d3d-9786-fb5c019f2552.json
deleted file mode 100644
index 45895340c177fcb179c768c88cb93c8e6a0ed459..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Distilled-Whiskey-8b/cf34d222-197f-4d3d-9786-fb5c019f2552.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Distilled-Whiskey-8b/1762652579.924494",
- "retrieved_timestamp": "1762652579.9244952",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Distilled-Whiskey-8b",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Distilled-Whiskey-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34476743928332376
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5027820189600739
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2545317220543807
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41721874999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3366855053191489
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Hermes3-L3.1-DirtyHarry-8B/a8086735-c7a7-48b5-9219-829e288040f5.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Hermes3-L3.1-DirtyHarry-8B/a8086735-c7a7-48b5-9219-829e288040f5.json
deleted file mode 100644
index 5ce5d658b60e96605a7b2ed20e0ebea89d542233..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Hermes3-L3.1-DirtyHarry-8B/a8086735-c7a7-48b5-9219-829e288040f5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Hermes3-L3.1-DirtyHarry-8B/1762652579.925645",
- "retrieved_timestamp": "1762652579.925645",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Hermes3-L3.1-DirtyHarry-8B",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Hermes3-L3.1-DirtyHarry-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32423414318452815
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5066388671914118
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07175226586102719
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4068958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3338597074468085
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Herodotos-14B/271dbfc3-d9cf-4cb7-b1c0-175f016ed32b.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Herodotos-14B/271dbfc3-d9cf-4cb7-b1c0-175f016ed32b.json
deleted file mode 100644
index d464b5604518eda494da9fe8acb6cec084c8e7fa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Herodotos-14B/271dbfc3-d9cf-4cb7-b1c0-175f016ed32b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Herodotos-14B/1762652579.925863",
- "retrieved_timestamp": "1762652579.925863",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Herodotos-14B",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Herodotos-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4667415790103592
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6435044367110887
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5045317220543807
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3733221476510067
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4795416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5290059840425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Herodotos-14B_V0.1/3c6d1b1b-465a-4b97-83ed-d2ebd27a905e.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Herodotos-14B_V0.1/3c6d1b1b-465a-4b97-83ed-d2ebd27a905e.json
deleted file mode 100644
index ad2344f28dc846b495bf4eae80925710c7b02e3c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Herodotos-14B_V0.1/3c6d1b1b-465a-4b97-83ed-d2ebd27a905e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Herodotos-14B_V0.1/1762652579.9261289",
- "retrieved_timestamp": "1762652579.926136",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Herodotos-14B_V0.1",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Herodotos-14B_V0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1878715142488597
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30172239497895226
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22399328859060402
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3683854166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11643949468085106
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_L3.1-8B-Dusky-Ink/4eed8b1b-591d-403b-96f4-c6db11e8b234.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_L3.1-8B-Dusky-Ink/4eed8b1b-591d-403b-96f4-c6db11e8b234.json
deleted file mode 100644
index fe3d622745726bf064ac319bb9f868a26dba286a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_L3.1-8B-Dusky-Ink/4eed8b1b-591d-403b-96f4-c6db11e8b234.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_L3.1-8B-Dusky-Ink/1762652579.926589",
- "retrieved_timestamp": "1762652579.92659",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/L3.1-8B-Dusky-Ink",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/L3.1-8B-Dusky-Ink"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4529780981130068
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5097902234872148
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12311178247734139
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4223958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36826795212765956
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_L3.1-8B-Dusky-Ink_v0.r1/a43e1d8d-8a9e-445b-9023-fc6d4a41fcfc.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_L3.1-8B-Dusky-Ink_v0.r1/a43e1d8d-8a9e-445b-9023-fc6d4a41fcfc.json
deleted file mode 100644
index 834e51d963189dcd561305c3f6b8d7514818b320..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_L3.1-8B-Dusky-Ink_v0.r1/a43e1d8d-8a9e-445b-9023-fc6d4a41fcfc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_L3.1-8B-Dusky-Ink_v0.r1/1762652579.926839",
- "retrieved_timestamp": "1762652579.92684",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/L3.1-8B-Dusky-Ink_v0.r1",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/L3.1-8B-Dusky-Ink_v0.r1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19848779017451473
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43372778578458115
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04305135951661632
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3988333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.320561835106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_LThreePointOne-8B-HermesBlackroot/d1c3467e-6189-4d6f-bedb-8c51fa8bfde6.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_LThreePointOne-8B-HermesBlackroot/d1c3467e-6189-4d6f-bedb-8c51fa8bfde6.json
deleted file mode 100644
index 5699be2d67390ba35a65fae7cae1bc1b9a58ec5c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_LThreePointOne-8B-HermesBlackroot/d1c3467e-6189-4d6f-bedb-8c51fa8bfde6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_LThreePointOne-8B-HermesBlackroot/1762652579.927087",
- "retrieved_timestamp": "1762652579.927088",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/LThreePointOne-8B-HermesBlackroot",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/LThreePointOne-8B-HermesBlackroot"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17920340252751588
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4998333246909241
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.019637462235649546
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3585520833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32845744680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_LThreePointOne-8B-HermesInk/1bb3c61f-2f72-4486-87ef-1e6d5ce58478.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_LThreePointOne-8B-HermesInk/1bb3c61f-2f72-4486-87ef-1e6d5ce58478.json
deleted file mode 100644
index 8d6579eb91d8d63c9a0ff1c645e438b0cc71e24e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_LThreePointOne-8B-HermesInk/1bb3c61f-2f72-4486-87ef-1e6d5ce58478.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_LThreePointOne-8B-HermesInk/1762652579.927316",
- "retrieved_timestamp": "1762652579.927316",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/LThreePointOne-8B-HermesInk",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/LThreePointOne-8B-HermesInk"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4031192790684273
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5222765555856439
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17220543806646527
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32298657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4129375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34674202127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-1.5b/26810cc0-541f-4ca5-b76e-f1a63baa61f6.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-1.5b/26810cc0-541f-4ca5-b76e-f1a63baa61f6.json
deleted file mode 100644
index 7490e407426635af5d0c88791e821d9cec37b2e5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-1.5b/26810cc0-541f-4ca5-b76e-f1a63baa61f6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-1.5b/1762652579.9280179",
- "retrieved_timestamp": "1762652579.9280179",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Minerva-1.5b",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Minerva-1.5b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2694295580171722
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4025709779119226
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1027190332326284
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3655
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.269780585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-1.5b_V0.2/fc5be34b-0fad-4fce-9df1-851e4fd3119d.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-1.5b_V0.2/fc5be34b-0fad-4fce-9df1-851e4fd3119d.json
deleted file mode 100644
index 48daaee8eac1ae7c7cb2a9c1a6f0263f27751e5b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-1.5b_V0.2/fc5be34b-0fad-4fce-9df1-851e4fd3119d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-1.5b_V0.2/1762652579.928302",
- "retrieved_timestamp": "1762652579.928303",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Minerva-1.5b_V0.2",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Minerva-1.5b_V0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3083474071020448
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3989042137094949
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11404833836858005
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3960104166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29105718085106386
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-10b/848ac6f9-2bb5-48fe-821a-83f28da91f92.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-10b/848ac6f9-2bb5-48fe-821a-83f28da91f92.json
deleted file mode 100644
index b3a7843e6544aafa353e2c03b3e15b8062078a73..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-10b/848ac6f9-2bb5-48fe-821a-83f28da91f92.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-10b/1762652579.928542",
- "retrieved_timestamp": "1762652579.928543",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Minerva-10b",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Minerva-10b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1878715142488597
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4462036157096501
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36270833333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23179853723404256
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 10.067
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-14b-V0.1/fc4971f4-983d-40f9-810a-16ed998c1dad.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-14b-V0.1/fc4971f4-983d-40f9-810a-16ed998c1dad.json
deleted file mode 100644
index ed936019ccdad742e7ed9497fd7a61fea930715b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-14b-V0.1/fc4971f4-983d-40f9-810a-16ed998c1dad.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-14b-V0.1/1762652579.92906",
- "retrieved_timestamp": "1762652579.9290612",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Minerva-14b-V0.1",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Minerva-14b-V0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0861292481726264
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6089792638423274
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30513595166163143
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36577181208053694
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47002083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5118018617021277
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-14b/54093f2d-15c3-465e-b876-5e4027deeb19.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-14b/54093f2d-15c3-465e-b876-5e4027deeb19.json
deleted file mode 100644
index 81a09e96cd29d270a39fb0ded74027dc508cc63b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-14b/54093f2d-15c3-465e-b876-5e4027deeb19.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-14b/1762652579.928819",
- "retrieved_timestamp": "1762652579.928819",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Minerva-14b",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Minerva-14b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3467898509288687
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6300829439447851
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30513595166163143
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37416107382550334
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.476625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5193650265957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-7b/aad7ed5c-d51d-46d7-af15-9c0447a02036.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-7b/aad7ed5c-d51d-46d7-af15-9c0447a02036.json
deleted file mode 100644
index fc72c1ba671f71eef2e63cf6216d4ad2e5b898e6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-7b/aad7ed5c-d51d-46d7-af15-9c0447a02036.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-7b/1762652579.929375",
- "retrieved_timestamp": "1762652579.929377",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Minerva-7b",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Minerva-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3724196243744376
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5498400501314606
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.283987915407855
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32298657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4143333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44439827127659576
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-8b/08cc58ae-b1dc-489c-ba25-338bb11db2ee.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-8b/08cc58ae-b1dc-489c-ba25-338bb11db2ee.json
deleted file mode 100644
index 9f68b562face6adb6fdceedf74ac9820b33565ab..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Minerva-8b/08cc58ae-b1dc-489c-ba25-338bb11db2ee.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Minerva-8b/1762652579.9296892",
- "retrieved_timestamp": "1762652579.9296901",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Minerva-8b",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Minerva-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17208451353519771
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46686093526780637
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.004531722054380665
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31208053691275167
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4272916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30892619680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Pans_Gutenbergum_V0.1/2a6af60c-eb46-46ae-8140-d050b48069ae.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Pans_Gutenbergum_V0.1/2a6af60c-eb46-46ae-8140-d050b48069ae.json
deleted file mode 100644
index 49cf2abcdd1b6e1d151aca754332a2ffd6363e0a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Pans_Gutenbergum_V0.1/2a6af60c-eb46-46ae-8140-d050b48069ae.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Pans_Gutenbergum_V0.1/1762652579.9304042",
- "retrieved_timestamp": "1762652579.9304051",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Pans_Gutenbergum_V0.1",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Pans_Gutenbergum_V0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.309696050922663
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5541091780465247
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10574018126888217
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32298657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4528125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3696808510638298
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Pans_Gutenbergum_V0.2/f9eef8a7-1f23-46f1-b57a-062ffd1b81a1.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Pans_Gutenbergum_V0.2/f9eef8a7-1f23-46f1-b57a-062ffd1b81a1.json
deleted file mode 100644
index 495dc3a495d9c25bda6498f5362fff7c10c2f8d1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Pans_Gutenbergum_V0.2/f9eef8a7-1f23-46f1-b57a-062ffd1b81a1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Pans_Gutenbergum_V0.2/1762652579.93062",
- "retrieved_timestamp": "1762652579.930621",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Pans_Gutenbergum_V0.2",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Pans_Gutenbergum_V0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3215113676157041
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.55257930562769
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06873111782477341
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31208053691275167
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46732291666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3585438829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Pantheon_ChatWaifu_V0.2/b57a86fa-8994-4004-a79d-d6da64e64b4d.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Pantheon_ChatWaifu_V0.2/b57a86fa-8994-4004-a79d-d6da64e64b4d.json
deleted file mode 100644
index 1de2f54d875b78db58c6431379f87e1dbde29d2d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Pantheon_ChatWaifu_V0.2/b57a86fa-8994-4004-a79d-d6da64e64b4d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Pantheon_ChatWaifu_V0.2/1762652579.930828",
- "retrieved_timestamp": "1762652579.930829",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Pantheon_ChatWaifu_V0.2",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Pantheon_ChatWaifu_V0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2682803849341968
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5531574435698693
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05664652567975831
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3179530201342282
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47551041666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34424867021276595
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-14B-Instruct-1M-Harmony/1cf0506b-dbdd-4f7e-abf5-d812763a722e.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-14B-Instruct-1M-Harmony/1cf0506b-dbdd-4f7e-abf5-d812763a722e.json
deleted file mode 100644
index 1ca25d444c1b6e96578cd5ae21618ed6a865c0cd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-14B-Instruct-1M-Harmony/1cf0506b-dbdd-4f7e-abf5-d812763a722e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-14B-Instruct-1M-Harmony/1762652579.93199",
- "retrieved_timestamp": "1762652579.931991",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Q2.5-14B-Instruct-1M-Harmony",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Q2.5-14B-Instruct-1M-Harmony"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5986327389105351
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6338808682301471
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3768882175226586
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.375
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4795416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5074800531914894
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-AthensCOT/54a29a68-c69a-4b49-a87a-cb93c459146a.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-AthensCOT/54a29a68-c69a-4b49-a87a-cb93c459146a.json
deleted file mode 100644
index eea53246e040c4b318766d165a8a8de061b4e2df..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-AthensCOT/54a29a68-c69a-4b49-a87a-cb93c459146a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-AthensCOT/1762652579.9322",
- "retrieved_timestamp": "1762652579.932201",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Q2.5-AthensCOT",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Q2.5-AthensCOT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45727447616767947
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5541692533534606
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29154078549848944
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4578333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4379155585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-CodeR1-3B/74342d21-8eac-494c-95b9-4df1e828473b.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-CodeR1-3B/74342d21-8eac-494c-95b9-4df1e828473b.json
deleted file mode 100644
index dc60e50d3bbcd2ba070057e3ec019d1dae01fb37..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-CodeR1-3B/74342d21-8eac-494c-95b9-4df1e828473b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-CodeR1-3B/1762652579.932402",
- "retrieved_timestamp": "1762652579.9324028",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Q2.5-CodeR1-3B",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Q2.5-CodeR1-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35875587884590665
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4660844324968853
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16389728096676737
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43154166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978723404255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.085
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-EVACOT-7b/972dfbcf-a5d0-4f9f-a39c-089c30ac91ab.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-EVACOT-7b/972dfbcf-a5d0-4f9f-a39c-089c30ac91ab.json
deleted file mode 100644
index 6c3614d4c6bba52e1744008f15295fba0ff03d77..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-EVACOT-7b/972dfbcf-a5d0-4f9f-a39c-089c30ac91ab.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-EVACOT-7b/1762652579.9326148",
- "retrieved_timestamp": "1762652579.932616",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Q2.5-EVACOT-7b",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Q2.5-EVACOT-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5784241368457914
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5505524946794311
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2824773413897281
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3179530201342282
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4498645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43309507978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-EvaHumane-RP/5146b3c9-9fdb-4a4e-a687-4bcf44b92309.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-EvaHumane-RP/5146b3c9-9fdb-4a4e-a687-4bcf44b92309.json
deleted file mode 100644
index 88a46147c893f5e8d3db6058403edfbb9c1a0c4c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-EvaHumane-RP/5146b3c9-9fdb-4a4e-a687-4bcf44b92309.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-EvaHumane-RP/1762652579.932837",
- "retrieved_timestamp": "1762652579.932837",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Q2.5-EvaHumane-RP",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Q2.5-EvaHumane-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3676234613048932
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5328196297646768
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29229607250755285
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3187919463087248
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42763541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4412400265957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-Humane-RP/697ad115-9040-42e4-b94b-529ab27011ee.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-Humane-RP/697ad115-9040-42e4-b94b-529ab27011ee.json
deleted file mode 100644
index 1790025ec80f76251224031bcc670f6d908fd7a3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-Humane-RP/697ad115-9040-42e4-b94b-529ab27011ee.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-Humane-RP/1762652579.933056",
- "retrieved_timestamp": "1762652579.933057",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Q2.5-Humane-RP",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Q2.5-Humane-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4411627814199657
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5649289292164736
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3391238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3187919463087248
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4528125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44921875
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-Instruct-1M_Harmony/f4cbe998-8c9f-47c1-a267-5831a40e4cf2.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-Instruct-1M_Harmony/f4cbe998-8c9f-47c1-a267-5831a40e4cf2.json
deleted file mode 100644
index b384db78eea43ec8e0c2d39c36abbc1f82215e03..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-Instruct-1M_Harmony/f4cbe998-8c9f-47c1-a267-5831a40e4cf2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-Instruct-1M_Harmony/1762652579.933266",
- "retrieved_timestamp": "1762652579.9332669",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Q2.5-Instruct-1M_Harmony",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Q2.5-Instruct-1M_Harmony"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6038034636985421
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5373243549676157
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3323262839879154
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32298657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46878125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43658577127659576
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-R1-3B/a4e4a936-5203-4a9d-a698-417cc9da866f.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-R1-3B/a4e4a936-5203-4a9d-a698-417cc9da866f.json
deleted file mode 100644
index 1dc59aa094cf5c7a7736a91e92117610a0b64986..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-R1-3B/a4e4a936-5203-4a9d-a698-417cc9da866f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-R1-3B/1762652579.933473",
- "retrieved_timestamp": "1762652579.933474",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Q2.5-R1-3B",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Q2.5-R1-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4213542290012722
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48124304786769817
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2673716012084592
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43197916666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38131648936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.085
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-R1-7B/302fa968-5d2d-4750-a1e6-c87534c1eafa.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-R1-7B/302fa968-5d2d-4750-a1e6-c87534c1eafa.json
deleted file mode 100644
index 317443a11b4365affa9f8a58aba4f8ce83fae67c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Q2.5-R1-7B/302fa968-5d2d-4750-a1e6-c87534c1eafa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Q2.5-R1-7B/1762652579.933674",
- "retrieved_timestamp": "1762652579.933675",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Q2.5-R1-7B",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Q2.5-R1-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1346150436397647
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30065625818799685
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01661631419939577
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2525167785234899
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3607291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1180186170212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Robo-Gutenberg_V1.0/d891d79a-1ec2-44e3-83cd-c28739aecd6e.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Robo-Gutenberg_V1.0/d891d79a-1ec2-44e3-83cd-c28739aecd6e.json
deleted file mode 100644
index 9eda526cd71f1ea1d24e1d379bb3890c2b78a33c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Robo-Gutenberg_V1.0/d891d79a-1ec2-44e3-83cd-c28739aecd6e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Robo-Gutenberg_V1.0/1762652579.9338748",
- "retrieved_timestamp": "1762652579.933876",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Robo-Gutenberg_V1.0",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Robo-Gutenberg_V1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6007559940956662
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.653716560941194
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4561933534743202
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3859060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47436458333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5391456117021277
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Rocinante-Prism_V2.0/9f32b229-a2d5-409b-98d2-65681616aff4.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Rocinante-Prism_V2.0/9f32b229-a2d5-409b-98d2-65681616aff4.json
deleted file mode 100644
index 11ce2471dc3ae48656a6e31f192f410d24bfc5fa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Rocinante-Prism_V2.0/9f32b229-a2d5-409b-98d2-65681616aff4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Rocinante-Prism_V2.0/1762652579.9340868",
- "retrieved_timestamp": "1762652579.9340868",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Rocinante-Prism_V2.0",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Rocinante-Prism_V2.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2616103051015749
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5361246041982355
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11102719033232629
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.445
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3640292553191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Rocinante-Prism_V2.1/7a93ddc1-8694-4b16-8183-1b7f46dfba92.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Rocinante-Prism_V2.1/7a93ddc1-8694-4b16-8183-1b7f46dfba92.json
deleted file mode 100644
index 69ffcd9e49e42bb3d96cc2bb5e72d6203f4451b5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Rocinante-Prism_V2.1/7a93ddc1-8694-4b16-8183-1b7f46dfba92.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Rocinante-Prism_V2.1/1762652579.934289",
- "retrieved_timestamp": "1762652579.93429",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Rocinante-Prism_V2.1",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Rocinante-Prism_V2.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25584005992987496
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5332676401860506
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11253776435045318
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44896874999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3651097074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Rombos-Novasky-7B_V1c/a06dc6ef-5d16-402a-a855-b7feec423aa5.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Rombos-Novasky-7B_V1c/a06dc6ef-5d16-402a-a855-b7feec423aa5.json
deleted file mode 100644
index 865c5a8690015a054b935a2ce0f99b9348df7d9e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Rombos-Novasky-7B_V1c/a06dc6ef-5d16-402a-a855-b7feec423aa5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Rombos-Novasky-7B_V1c/1762652579.934721",
- "retrieved_timestamp": "1762652579.934722",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Rombos-Novasky-7B_V1c",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Rombos-Novasky-7B_V1c"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40801517750679306
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4349247829177707
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08534743202416918
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44645833333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27376994680851063
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Set-70b/e25fa684-c237-4bce-8498-7bdfaac970a9.json b/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Set-70b/e25fa684-c237-4bce-8498-7bdfaac970a9.json
deleted file mode 100644
index 5bda0314aeb1192647aebf0f63e222a55a27fce0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Triangle104/Triangle104_Set-70b/e25fa684-c237-4bce-8498-7bdfaac970a9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Set-70b/1762652579.934931",
- "retrieved_timestamp": "1762652579.934931",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Set-70b",
- "developer": "Triangle104",
- "inference_platform": "unknown",
- "id": "Triangle104/Set-70b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7642954028643998
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.70142939330013
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3640483383685801
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4463087248322148
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46956250000000005
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5442154255319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-0.5-7B-Instruct/df3de449-9abc-4f0a-ba6e-caa48720893a.json b/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-0.5-7B-Instruct/df3de449-9abc-4f0a-ba6e-caa48720893a.json
deleted file mode 100644
index 349433bf13f42b5e35d56ab6dd47c80a3ff8b412..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-0.5-7B-Instruct/df3de449-9abc-4f0a-ba6e-caa48720893a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Tsunami-th_Tsunami-0.5-7B-Instruct/1762652579.935141",
- "retrieved_timestamp": "1762652579.9351418",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Tsunami-th/Tsunami-0.5-7B-Instruct",
- "developer": "Tsunami-th",
- "inference_platform": "unknown",
- "id": "Tsunami-th/Tsunami-0.5-7B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7400153814102137
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.552369427738073
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5045317220543807
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42571875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44132313829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-0.5x-7B-Instruct/fec678b9-c51b-4945-8d4f-f06af6528227.json b/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-0.5x-7B-Instruct/fec678b9-c51b-4945-8d4f-f06af6528227.json
deleted file mode 100644
index ba92c73ae7ad3ff0b3c71b1d8d194dd325c897da..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-0.5x-7B-Instruct/fec678b9-c51b-4945-8d4f-f06af6528227.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Tsunami-th_Tsunami-0.5x-7B-Instruct/1762652579.9353971",
- "retrieved_timestamp": "1762652579.9353979",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Tsunami-th/Tsunami-0.5x-7B-Instruct",
- "developer": "Tsunami-th",
- "inference_platform": "unknown",
- "id": "Tsunami-th/Tsunami-0.5x-7B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.709915247099917
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5592865858560252
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4206948640483384
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3145973154362416
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46667708333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44581117021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-1.0-14B-Instruct/11262698-480b-425b-b013-f362fae2f254.json b/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-1.0-14B-Instruct/11262698-480b-425b-b013-f362fae2f254.json
deleted file mode 100644
index 6bd0f1fad4e366cfd518280be0038f23c6e62069..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-1.0-14B-Instruct/11262698-480b-425b-b013-f362fae2f254.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Tsunami-th_Tsunami-1.0-14B-Instruct/1762652579.935597",
- "retrieved_timestamp": "1762652579.935597",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Tsunami-th/Tsunami-1.0-14B-Instruct",
- "developer": "Tsunami-th",
- "inference_platform": "unknown",
- "id": "Tsunami-th/Tsunami-1.0-14B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7829049145157072
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6438763263011559
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45845921450151056
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3565436241610738
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44593750000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5248503989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-1.0-7B-Instruct/ccffe03b-c166-48de-8516-8253b2c2f96e.json b/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-1.0-7B-Instruct/ccffe03b-c166-48de-8516-8253b2c2f96e.json
deleted file mode 100644
index 8ee9ddac452cb1ee0b4b871132b68061c13e0b83..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Tsunami-th/Tsunami-th_Tsunami-1.0-7B-Instruct/ccffe03b-c166-48de-8516-8253b2c2f96e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Tsunami-th_Tsunami-1.0-7B-Instruct/1762652579.9358132",
- "retrieved_timestamp": "1762652579.9358132",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Tsunami-th/Tsunami-1.0-7B-Instruct",
- "developer": "Tsunami-th",
- "inference_platform": "unknown",
- "id": "Tsunami-th/Tsunami-1.0-7B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.730872972601586
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.549071195618326
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4335347432024169
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44928125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4424035904255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter1/07af3512-a045-435e-a965-8daa0836905d.json b/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter1/07af3512-a045-435e-a965-8daa0836905d.json
deleted file mode 100644
index 65059a138c42a4fba93b2b17d1b3036afc754665..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter1/07af3512-a045-435e-a965-8daa0836905d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter1/1762652579.9367309",
- "retrieved_timestamp": "1762652579.9367318",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1",
- "developer": "UCLA-AGI",
- "inference_platform": "unknown",
- "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7298988904994304
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5057890691082708
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1148036253776435
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3567916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37109375
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter2/0c5c315f-63c4-427e-a307-1422a197895c.json b/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter2/0c5c315f-63c4-427e-a307-1422a197895c.json
deleted file mode 100644
index 6a10c1cf09da116c3340b81d9c5953bf9865437e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter2/0c5c315f-63c4-427e-a307-1422a197895c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter2/1762652579.93697",
- "retrieved_timestamp": "1762652579.936971",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2",
- "developer": "UCLA-AGI",
- "inference_platform": "unknown",
- "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6988745417713889
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5088696278852957
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10347432024169184
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26677852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35942708333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36918218085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter3/49e095af-ed90-4e64-b476-4fc62d6e6997.json b/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter3/49e095af-ed90-4e64-b476-4fc62d6e6997.json
deleted file mode 100644
index a16ae4e325bd5cba6d58ecf4ecee6921feb49f4a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter3/49e095af-ed90-4e64-b476-4fc62d6e6997.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter3/1762652579.937367",
- "retrieved_timestamp": "1762652579.9373682",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3",
- "developer": "UCLA-AGI",
- "inference_platform": "unknown",
- "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.67029814226253
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5076407742830437
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07175226586102719
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3647291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3657746010638298
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter3/d8d05a10-8889-40aa-b56f-365e0a12052c.json b/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter3/d8d05a10-8889-40aa-b56f-365e0a12052c.json
deleted file mode 100644
index 595b41baa3825024472c4b8305e2492eda9b1bc4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/UCLA-AGI/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter3/d8d05a10-8889-40aa-b56f-365e0a12052c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/UCLA-AGI_Llama-3-Instruct-8B-SPPO-Iter3/1762652579.937166",
- "retrieved_timestamp": "1762652579.9371672",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3",
- "developer": "UCLA-AGI",
- "inference_platform": "unknown",
- "id": "UCLA-AGI/Llama-3-Instruct-8B-SPPO-Iter3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6834122350917787
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.50795799761689
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09592145015105741
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36606249999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3644448138297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Undi95/Undi95_MG-FinalMix-72B/3d3598fa-4b23-4ec6-a010-fb20232a5121.json b/leaderboard_data/HFOpenLLMv2/Undi95/Undi95_MG-FinalMix-72B/3d3598fa-4b23-4ec6-a010-fb20232a5121.json
deleted file mode 100644
index 4e217e7075b583a2f155531b0d2b8a05dbcda377..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Undi95/Undi95_MG-FinalMix-72B/3d3598fa-4b23-4ec6-a010-fb20232a5121.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Undi95_MG-FinalMix-72B/1762652579.938925",
- "retrieved_timestamp": "1762652579.938925",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Undi95/MG-FinalMix-72B",
- "developer": "Undi95",
- "inference_platform": "unknown",
- "id": "Undi95/MG-FinalMix-72B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8013648231137825
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6973017446417747
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3972809667673716
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3850671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48227083333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.542719414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.706
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/V3N0M/V3N0M_Jenna-Tiny-2.0/d9785857-b164-4d38-8d03-0e03e2d0fbf5.json b/leaderboard_data/HFOpenLLMv2/V3N0M/V3N0M_Jenna-Tiny-2.0/d9785857-b164-4d38-8d03-0e03e2d0fbf5.json
deleted file mode 100644
index e54b54f28fedb19d180ce28c61aaf3dc2608127f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/V3N0M/V3N0M_Jenna-Tiny-2.0/d9785857-b164-4d38-8d03-0e03e2d0fbf5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/V3N0M_Jenna-Tiny-2.0/1762652579.9394162",
- "retrieved_timestamp": "1762652579.9394171",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "V3N0M/Jenna-Tiny-2.0",
- "developer": "V3N0M",
- "inference_platform": "unknown",
- "id": "V3N0M/Jenna-Tiny-2.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2309361383351729
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31479264061817097
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.012084592145015106
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33666666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1146941489361702
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.631
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3-SauerkrautLM-70b-Instruct/eb8adbdf-2cfb-4e9e-8f75-ce2734907725.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3-SauerkrautLM-70b-Instruct/eb8adbdf-2cfb-4e9e-8f75-ce2734907725.json
deleted file mode 100644
index 4f9743b7fa9f1a1deb2a6aa0e2f1687aa7882573..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3-SauerkrautLM-70b-Instruct/eb8adbdf-2cfb-4e9e-8f75-ce2734907725.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/VAGOsolutions_Llama-3-SauerkrautLM-70b-Instruct/1762652579.939689",
- "retrieved_timestamp": "1762652579.939689",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct",
- "developer": "VAGOsolutions",
- "inference_platform": "unknown",
- "id": "VAGOsolutions/Llama-3-SauerkrautLM-70b-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8044621604010691
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6663247245334951
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2280966767371601
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32802013422818793
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43393750000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5392287234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3-SauerkrautLM-8b-Instruct/ad99531d-4d52-4175-8ebd-cb172b4577de.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3-SauerkrautLM-8b-Instruct/ad99531d-4d52-4175-8ebd-cb172b4577de.json
deleted file mode 100644
index 48507fccb9c754bb3954b304c398e8d8fe1297cf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3-SauerkrautLM-8b-Instruct/ad99531d-4d52-4175-8ebd-cb172b4577de.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/VAGOsolutions_Llama-3-SauerkrautLM-8b-Instruct/1762652579.93995",
- "retrieved_timestamp": "1762652579.9399512",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct",
- "developer": "VAGOsolutions",
- "inference_platform": "unknown",
- "id": "VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.744536718130117
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.494337579362695
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06646525679758308
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42410416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3857214095744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3.1-SauerkrautLM-70b-Instruct/2e3eca4b-4c15-4b3b-8c44-3a23312a0797.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3.1-SauerkrautLM-70b-Instruct/2e3eca4b-4c15-4b3b-8c44-3a23312a0797.json
deleted file mode 100644
index c13a18d5fe24279d5d6db5a2c12507f2cc870077..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3.1-SauerkrautLM-70b-Instruct/2e3eca4b-4c15-4b3b-8c44-3a23312a0797.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/VAGOsolutions_Llama-3.1-SauerkrautLM-70b-Instruct/1762652579.940237",
- "retrieved_timestamp": "1762652579.940238",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct",
- "developer": "VAGOsolutions",
- "inference_platform": "unknown",
- "id": "VAGOsolutions/Llama-3.1-SauerkrautLM-70b-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8656365111238181
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7006249194404001
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3693353474320242
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3414429530201342
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4710833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5334940159574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3.1-SauerkrautLM-8b-Instruct/aa425d3e-e363-46bf-a5fb-cbf524657e85.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3.1-SauerkrautLM-8b-Instruct/aa425d3e-e363-46bf-a5fb-cbf524657e85.json
deleted file mode 100644
index b4825eef4ac6ab963192865641ebcf014c0934d4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_Llama-3.1-SauerkrautLM-8b-Instruct/aa425d3e-e363-46bf-a5fb-cbf524657e85.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/VAGOsolutions_Llama-3.1-SauerkrautLM-8b-Instruct/1762652579.9404852",
- "retrieved_timestamp": "1762652579.940486",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct",
- "developer": "VAGOsolutions",
- "inference_platform": "unknown",
- "id": "VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8017393848322452
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5114932190011187
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19410876132930513
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4148020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3890458776595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-1.5b/22ae39ae-883c-43a7-abbe-3213b9035b58.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-1.5b/22ae39ae-883c-43a7-abbe-3213b9035b58.json
deleted file mode 100644
index 5bf53c22c027b1715974abb40941be34ff875a88..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-1.5b/22ae39ae-883c-43a7-abbe-3213b9035b58.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-1.5b/1762652579.940706",
- "retrieved_timestamp": "1762652579.940707",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "VAGOsolutions/SauerkrautLM-1.5b",
- "developer": "VAGOsolutions",
- "inference_platform": "unknown",
- "id": "VAGOsolutions/SauerkrautLM-1.5b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24040324117785256
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3703912164863146
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03625377643504532
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37390625000000005
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21509308510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-7b-HerO/be74b2d6-28b9-4227-b0ec-fbad4b7dada6.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-7b-HerO/be74b2d6-28b9-4227-b0ec-fbad4b7dada6.json
deleted file mode 100644
index 8f4b3ee9f12e691bf9c603cfe2f41d507be3e541..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-7b-HerO/be74b2d6-28b9-4227-b0ec-fbad4b7dada6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-7b-HerO/1762652579.940931",
- "retrieved_timestamp": "1762652579.940931",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "VAGOsolutions/SauerkrautLM-7b-HerO",
- "developer": "VAGOsolutions",
- "inference_platform": "unknown",
- "id": "VAGOsolutions/SauerkrautLM-7b-HerO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.534610389322553
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49044349935812964
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03927492447129909
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39238541666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30460438829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-7b-LaserChat/35512aeb-611a-46a8-849e-442fc3fcc23a.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-7b-LaserChat/35512aeb-611a-46a8-849e-442fc3fcc23a.json
deleted file mode 100644
index 1dbb1a06d9bb57ab6dfd054808f8489d2434f68b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-7b-LaserChat/35512aeb-611a-46a8-849e-442fc3fcc23a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-7b-LaserChat/1762652579.941142",
- "retrieved_timestamp": "1762652579.941143",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "VAGOsolutions/SauerkrautLM-7b-LaserChat",
- "developer": "VAGOsolutions",
- "inference_platform": "unknown",
- "id": "VAGOsolutions/SauerkrautLM-7b-LaserChat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5987823419637672
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45432707993295685
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07779456193353475
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4148020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3304521276595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-Mixtral-8x7B-Instruct/f105fe57-632a-4e3b-bbcb-f063f2e10874.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-Mixtral-8x7B-Instruct/f105fe57-632a-4e3b-bbcb-f063f2e10874.json
deleted file mode 100644
index 62a26c69bf043538ed09090010e094a1df4f0618..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-Mixtral-8x7B-Instruct/f105fe57-632a-4e3b-bbcb-f063f2e10874.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Mixtral-8x7B-Instruct/1762652579.9418082",
- "retrieved_timestamp": "1762652579.941809",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct",
- "developer": "VAGOsolutions",
- "inference_platform": "unknown",
- "id": "VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5601891869129465
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5277342269858817
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09818731117824774
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42041666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3650265957446808
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 46.703
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-Nemo-12b-Instruct/b5db7846-f777-4fa8-86e9-f09fdee1dfee.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-Nemo-12b-Instruct/b5db7846-f777-4fa8-86e9-f09fdee1dfee.json
deleted file mode 100644
index bab93b0c74c3ece945682eb9fd66c4e9d3a45dfd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-Nemo-12b-Instruct/b5db7846-f777-4fa8-86e9-f09fdee1dfee.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Nemo-12b-Instruct/1762652579.942016",
- "retrieved_timestamp": "1762652579.942017",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct",
- "developer": "VAGOsolutions",
- "inference_platform": "unknown",
- "id": "VAGOsolutions/SauerkrautLM-Nemo-12b-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6112969144093228
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5214128647611115
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12235649546827794
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4468958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33851396276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-SOLAR-Instruct/24fbb409-3b1a-4ed2-8866-547a7f02c5dc.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-SOLAR-Instruct/24fbb409-3b1a-4ed2-8866-547a7f02c5dc.json
deleted file mode 100644
index faedb1cc8e9789d28f39316b02015d765fa6bd92..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-SOLAR-Instruct/24fbb409-3b1a-4ed2-8866-547a7f02c5dc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-SOLAR-Instruct/1762652579.942544",
- "retrieved_timestamp": "1762652579.942544",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "VAGOsolutions/SauerkrautLM-SOLAR-Instruct",
- "developer": "VAGOsolutions",
- "inference_platform": "unknown",
- "id": "VAGOsolutions/SauerkrautLM-SOLAR-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49172085621705963
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5169447300097646
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0634441087613293
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3965416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31831781914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.732
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-v2-14b-DPO/e4b13fb1-11c0-4696-856f-de393fe2f8b2.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-v2-14b-DPO/e4b13fb1-11c0-4696-856f-de393fe2f8b2.json
deleted file mode 100644
index 4c867ce645843148bb36d42e402fc527e9e39b66..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-v2-14b-DPO/e4b13fb1-11c0-4696-856f-de393fe2f8b2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-v2-14b-DPO/1762652579.943197",
- "retrieved_timestamp": "1762652579.943197",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "VAGOsolutions/SauerkrautLM-v2-14b-DPO",
- "developer": "VAGOsolutions",
- "inference_platform": "unknown",
- "id": "VAGOsolutions/SauerkrautLM-v2-14b-DPO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7411645544931892
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6560374350756156
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3164652567975831
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43746875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.51171875
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-v2-14b-SFT/d1b47391-f36e-4819-8093-5aff774dff94.json b/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-v2-14b-SFT/d1b47391-f36e-4819-8093-5aff774dff94.json
deleted file mode 100644
index 8d1efc2ca43a0939345d9731db493cb7f238ec80..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/VAGOsolutions/VAGOsolutions_SauerkrautLM-v2-14b-SFT/d1b47391-f36e-4819-8093-5aff774dff94.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-v2-14b-SFT/1762652579.94341",
- "retrieved_timestamp": "1762652579.9434109",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "VAGOsolutions/SauerkrautLM-v2-14b-SFT",
- "developer": "VAGOsolutions",
- "inference_platform": "unknown",
- "id": "VAGOsolutions/SauerkrautLM-v2-14b-SFT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6948529900663573
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6210355880693049
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3285498489425982
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33557046979865773
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.417875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5205285904255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Vikhrmodels/Vikhrmodels_Vikhr-Llama3.1-8B-Instruct-R-21-09-24/b0332107-4b84-4c0a-b488-187fb3d534ae.json b/leaderboard_data/HFOpenLLMv2/Vikhrmodels/Vikhrmodels_Vikhr-Llama3.1-8B-Instruct-R-21-09-24/b0332107-4b84-4c0a-b488-187fb3d534ae.json
deleted file mode 100644
index da902168e0de2e5579aa14d26a56a08d210a0f48..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Vikhrmodels/Vikhrmodels_Vikhr-Llama3.1-8B-Instruct-R-21-09-24/b0332107-4b84-4c0a-b488-187fb3d534ae.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Vikhrmodels_Vikhr-Llama3.1-8B-Instruct-R-21-09-24/1762652579.9476302",
- "retrieved_timestamp": "1762652579.9476311",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24",
- "developer": "Vikhrmodels",
- "inference_platform": "unknown",
- "id": "Vikhrmodels/Vikhr-Llama3.1-8B-Instruct-R-21-09-24"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.643145742186288
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.527224269970207
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2175226586102719
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24496644295302014
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3753958333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3547207446808511
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Vikhrmodels/Vikhrmodels_Vikhr-Nemo-12B-Instruct-R-21-09-24/787cc582-61da-4afd-bfac-431377809fd9.json b/leaderboard_data/HFOpenLLMv2/Vikhrmodels/Vikhrmodels_Vikhr-Nemo-12B-Instruct-R-21-09-24/787cc582-61da-4afd-bfac-431377809fd9.json
deleted file mode 100644
index a299a4275539c1bdf95a3cb10bb5e6b43e1605f6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Vikhrmodels/Vikhrmodels_Vikhr-Nemo-12B-Instruct-R-21-09-24/787cc582-61da-4afd-bfac-431377809fd9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Vikhrmodels_Vikhr-Nemo-12B-Instruct-R-21-09-24/1762652579.947979",
- "retrieved_timestamp": "1762652579.94798",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24",
- "developer": "Vikhrmodels",
- "inference_platform": "unknown",
- "id": "Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5999315150467426
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5212309052827618
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1714501510574018
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40730208333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33976063829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_Bagel-Hermes-2x34B/5b614673-6566-4b82-bf7c-13268ebb1577.json b/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_Bagel-Hermes-2x34B/5b614673-6566-4b82-bf7c-13268ebb1577.json
deleted file mode 100644
index adbffd722342f7fcc5940dec24e32fe7af8c3234..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_Bagel-Hermes-2x34B/5b614673-6566-4b82-bf7c-13268ebb1577.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Weyaxi_Bagel-Hermes-2x34B/1762652579.948213",
- "retrieved_timestamp": "1762652579.948214",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Weyaxi/Bagel-Hermes-2x34B",
- "developer": "Weyaxi",
- "inference_platform": "unknown",
- "id": "Weyaxi/Bagel-Hermes-2x34B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5431532777474878
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49166555632285514
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32802013422818793
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45166666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4588597074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 60.814
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_Bagel-Hermes-34B-Slerp/28439ab5-0e5f-4dae-a98a-e0c1b743a8b0.json b/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_Bagel-Hermes-34B-Slerp/28439ab5-0e5f-4dae-a98a-e0c1b743a8b0.json
deleted file mode 100644
index 3336bed36967d1a31d963c49a32ff1c10dced888..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_Bagel-Hermes-34B-Slerp/28439ab5-0e5f-4dae-a98a-e0c1b743a8b0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Weyaxi_Bagel-Hermes-34B-Slerp/1762652579.948482",
- "retrieved_timestamp": "1762652579.948482",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Weyaxi/Bagel-Hermes-34B-Slerp",
- "developer": "Weyaxi",
- "inference_platform": "unknown",
- "id": "Weyaxi/Bagel-Hermes-34B-Slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4602720780861448
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5921903605860047
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3347315436241611
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46220833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4703291223404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.389
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_Einstein-v4-7B/035c5e35-0ebe-4e91-a598-8d01688462a3.json b/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_Einstein-v4-7B/035c5e35-0ebe-4e91-a598-8d01688462a3.json
deleted file mode 100644
index dbfbe27a1723bba2e600b1e1518dc220b00a6052..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_Einstein-v4-7B/035c5e35-0ebe-4e91-a598-8d01688462a3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Weyaxi_Einstein-v4-7B/1762652579.948704",
- "retrieved_timestamp": "1762652579.948705",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Weyaxi/Einstein-v4-7B",
- "developer": "Weyaxi",
- "inference_platform": "unknown",
- "id": "Weyaxi/Einstein-v4-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47081299839980145
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38494699692741774
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0188821752265861
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28187919463087246
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4681666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22589760638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_SauerkrautLM-UNA-SOLAR-Instruct/8ddec5bb-ab90-4c98-8482-a412e7735246.json b/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_SauerkrautLM-UNA-SOLAR-Instruct/8ddec5bb-ab90-4c98-8482-a412e7735246.json
deleted file mode 100644
index 123ffa5e9fc3c51b02b1a44faf97fabef6dce32e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Weyaxi/Weyaxi_SauerkrautLM-UNA-SOLAR-Instruct/8ddec5bb-ab90-4c98-8482-a412e7735246.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Weyaxi_SauerkrautLM-UNA-SOLAR-Instruct/1762652579.950165",
- "retrieved_timestamp": "1762652579.950166",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct",
- "developer": "Weyaxi",
- "inference_platform": "unknown",
- "id": "Weyaxi/SauerkrautLM-UNA-SOLAR-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4573243438520902
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5166357112030591
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04607250755287009
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.311241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.397875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31532579787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.732
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/WizardLMTeam/WizardLMTeam_WizardLM-13B-V1.0/ab4f785b-779f-423b-9905-31a3b66dfeff.json b/leaderboard_data/HFOpenLLMv2/WizardLMTeam/WizardLMTeam_WizardLM-13B-V1.0/ab4f785b-779f-423b-9905-31a3b66dfeff.json
deleted file mode 100644
index 1c4d347f216945a8f62bfc4463701deec1855b14..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/WizardLMTeam/WizardLMTeam_WizardLM-13B-V1.0/ab4f785b-779f-423b-9905-31a3b66dfeff.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/WizardLMTeam_WizardLM-13B-V1.0/1762652579.9503958",
- "retrieved_timestamp": "1762652579.950397",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "WizardLMTeam/WizardLM-13B-V1.0",
- "developer": "WizardLMTeam",
- "inference_platform": "unknown",
- "id": "WizardLMTeam/WizardLM-13B-V1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18504900331121424
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29134447696551025
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34971875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11660571808510638
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 13.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/WizardLMTeam/WizardLMTeam_WizardLM-13B-V1.2/f9d2286c-ed89-4c23-b6a2-c623373331cd.json b/leaderboard_data/HFOpenLLMv2/WizardLMTeam/WizardLMTeam_WizardLM-13B-V1.2/f9d2286c-ed89-4c23-b6a2-c623373331cd.json
deleted file mode 100644
index 5e1bb1e528702255d017b1ca614e1447d5eb6f46..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/WizardLMTeam/WizardLMTeam_WizardLM-13B-V1.2/f9d2286c-ed89-4c23-b6a2-c623373331cd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/WizardLMTeam_WizardLM-13B-V1.2/1762652579.950676",
- "retrieved_timestamp": "1762652579.950676",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "WizardLMTeam/WizardLM-13B-V1.2",
- "developer": "WizardLMTeam",
- "inference_platform": "unknown",
- "id": "WizardLMTeam/WizardLM-13B-V1.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3392465325336773
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44619994364600474
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0188821752265861
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43784375000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25191156914893614
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 13.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/WizardLMTeam/WizardLMTeam_WizardLM-70B-V1.0/8c4ff628-41b6-4769-a33e-b1dbffa913cf.json b/leaderboard_data/HFOpenLLMv2/WizardLMTeam/WizardLMTeam_WizardLM-70B-V1.0/8c4ff628-41b6-4769-a33e-b1dbffa913cf.json
deleted file mode 100644
index 428262959a27860a9a47031c41828e5a8e76437b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/WizardLMTeam/WizardLMTeam_WizardLM-70B-V1.0/8c4ff628-41b6-4769-a33e-b1dbffa913cf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/WizardLMTeam_WizardLM-70B-V1.0/1762652579.950908",
- "retrieved_timestamp": "1762652579.950909",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "WizardLMTeam/WizardLM-70B-V1.0",
- "developer": "WizardLMTeam",
- "inference_platform": "unknown",
- "id": "WizardLMTeam/WizardLM-70B-V1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49514288753839814
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5590366047184262
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03927492447129909
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43911458333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34466422872340424
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Wladastic/Wladastic_Mini-Think-Base-1B/5f9a01b0-632a-4ee4-aedc-279002c7496c.json b/leaderboard_data/HFOpenLLMv2/Wladastic/Wladastic_Mini-Think-Base-1B/5f9a01b0-632a-4ee4-aedc-279002c7496c.json
deleted file mode 100644
index 740782842d2600496cca62108ce4047ff31d26e7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Wladastic/Wladastic_Mini-Think-Base-1B/5f9a01b0-632a-4ee4-aedc-279002c7496c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Wladastic_Mini-Think-Base-1B/1762652579.951128",
- "retrieved_timestamp": "1762652579.9511292",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Wladastic/Mini-Think-Base-1B",
- "developer": "Wladastic",
- "inference_platform": "unknown",
- "id": "Wladastic/Mini-Think-Base-1B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5588405430923283
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35741728048349203
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07326283987915408
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32748958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17719414893617022
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.236
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_Arcanum-12b/2d0a414f-1cf2-4ae3-951b-ed69d1ef883f.json b/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_Arcanum-12b/2d0a414f-1cf2-4ae3-951b-ed69d1ef883f.json
deleted file mode 100644
index 29414f527fa8fd67854ee7ec10b4d8dc7716d7a7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_Arcanum-12b/2d0a414f-1cf2-4ae3-951b-ed69d1ef883f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Xclbr7_Arcanum-12b/1762652579.9514",
- "retrieved_timestamp": "1762652579.951401",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Xclbr7/Arcanum-12b",
- "developer": "Xclbr7",
- "inference_platform": "unknown",
- "id": "Xclbr7/Arcanum-12b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2906864896253053
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5265359354118465
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11933534743202417
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41703124999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3586269946808511
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_Hyena-12b/06eb233f-5182-4b9e-be3f-21c928eef397.json b/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_Hyena-12b/06eb233f-5182-4b9e-be3f-21c928eef397.json
deleted file mode 100644
index 1a8023a2c07cccf18a43f5c129af40c6c3502f9f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_Hyena-12b/06eb233f-5182-4b9e-be3f-21c928eef397.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Xclbr7_Hyena-12b/1762652579.9516642",
- "retrieved_timestamp": "1762652579.951665",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Xclbr7/Hyena-12b",
- "developer": "Xclbr7",
- "inference_platform": "unknown",
- "id": "Xclbr7/Hyena-12b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3404455733010634
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5457182415468321
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11329305135951662
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39842708333333327
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3439162234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_caliburn-12b/e897d1fc-2c71-4c61-971b-eeddfae1b75c.json b/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_caliburn-12b/e897d1fc-2c71-4c61-971b-eeddfae1b75c.json
deleted file mode 100644
index f3932473329cfb72a067d4a3f021c88b892463f0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_caliburn-12b/e897d1fc-2c71-4c61-971b-eeddfae1b75c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Xclbr7_caliburn-12b/1762652579.951879",
- "retrieved_timestamp": "1762652579.95188",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Xclbr7/caliburn-12b",
- "developer": "Xclbr7",
- "inference_platform": "unknown",
- "id": "Xclbr7/caliburn-12b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35763108551975425
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5518630300231809
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11253776435045318
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33640939597315433
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4291875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36751994680851063
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_caliburn-v2-12b/18a12670-8785-44ef-a365-78ce797b8ba5.json b/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_caliburn-v2-12b/18a12670-8785-44ef-a365-78ce797b8ba5.json
deleted file mode 100644
index 600bec3b75539c693e86fdcb0f23594b4ccf1a42..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Xclbr7/Xclbr7_caliburn-v2-12b/18a12670-8785-44ef-a365-78ce797b8ba5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Xclbr7_caliburn-v2-12b/1762652579.952102",
- "retrieved_timestamp": "1762652579.952102",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Xclbr7/caliburn-v2-12b",
- "developer": "Xclbr7",
- "inference_platform": "unknown",
- "id": "Xclbr7/caliburn-v2-12b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2966816934622358
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5141426125097639
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10498489425981873
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3263422818791946
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43703125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37840757978723405
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Xiaojian9992024/Xiaojian9992024_Reflection-L3.2-JametMiniMix-3B/e582afbb-99f3-4b43-8ee7-b786680124a9.json b/leaderboard_data/HFOpenLLMv2/Xiaojian9992024/Xiaojian9992024_Reflection-L3.2-JametMiniMix-3B/e582afbb-99f3-4b43-8ee7-b786680124a9.json
deleted file mode 100644
index 54d8d917d6c5d2eb70023d9a6d751c42e9b07ae7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Xiaojian9992024/Xiaojian9992024_Reflection-L3.2-JametMiniMix-3B/e582afbb-99f3-4b43-8ee7-b786680124a9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Reflection-L3.2-JametMiniMix-3B/1762652579.9550028",
- "retrieved_timestamp": "1762652579.9550028",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B",
- "developer": "Xiaojian9992024",
- "inference_platform": "unknown",
- "id": "Xiaojian9992024/Reflection-L3.2-JametMiniMix-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46194541594081484
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4389528940684813
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11933534743202417
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36673958333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29878656914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Yash21/Yash21_TinyYi-7B-Test/d6a9abee-29ee-44e0-802c-c3e4354ebbac.json b/leaderboard_data/HFOpenLLMv2/Yash21/Yash21_TinyYi-7B-Test/d6a9abee-29ee-44e0-802c-c3e4354ebbac.json
deleted file mode 100644
index 5dc104502409511e1a670653f75c2315ff84b1d9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Yash21/Yash21_TinyYi-7B-Test/d6a9abee-29ee-44e0-802c-c3e4354ebbac.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Yash21_TinyYi-7B-Test/1762652579.960211",
- "retrieved_timestamp": "1762652579.960212",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Yash21/TinyYi-7B-Test",
- "developer": "Yash21",
- "inference_platform": "unknown",
- "id": "Yash21/TinyYi-7B-Test"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18564852369490728
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29098007801214715
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3364479166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10912566489361702
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.061
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_1PARAMMYL-8B-ModelStock/87231cbd-d911-434d-991b-1eb373cdde4f.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_1PARAMMYL-8B-ModelStock/87231cbd-d911-434d-991b-1eb373cdde4f.json
deleted file mode 100644
index 8a862065f8f58aa7f711a519bccba1af27cdbdba..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_1PARAMMYL-8B-ModelStock/87231cbd-d911-434d-991b-1eb373cdde4f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Youlln_1PARAMMYL-8B-ModelStock/1762652579.9604638",
- "retrieved_timestamp": "1762652579.960465",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Youlln/1PARAMMYL-8B-ModelStock",
- "developer": "Youlln",
- "inference_platform": "unknown",
- "id": "Youlln/1PARAMMYL-8B-ModelStock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5371336941537344
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5215839663555125
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1487915407854985
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3238255033557047
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4409375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4000166223404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_2PRYMMAL-Yi1.5-6B-SLERP/e80773ef-5ca2-43de-ba99-a7a997aab7f0.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_2PRYMMAL-Yi1.5-6B-SLERP/e80773ef-5ca2-43de-ba99-a7a997aab7f0.json
deleted file mode 100644
index 815b64a7071c38268a8e9cb1c1e3d81780240426..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_2PRYMMAL-Yi1.5-6B-SLERP/e80773ef-5ca2-43de-ba99-a7a997aab7f0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Youlln_2PRYMMAL-Yi1.5-6B-SLERP/1762652579.9607239",
- "retrieved_timestamp": "1762652579.960725",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Youlln/2PRYMMAL-Yi1.5-6B-SLERP",
- "developer": "Youlln",
- "inference_platform": "unknown",
- "id": "Youlln/2PRYMMAL-Yi1.5-6B-SLERP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28259351853083153
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46647504291710673
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11329305135951662
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47560416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3169880319148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.061
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-MIRAGE-1-12B/f3f55015-88c7-41ae-b588-9a1eedd56fc2.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-MIRAGE-1-12B/f3f55015-88c7-41ae-b588-9a1eedd56fc2.json
deleted file mode 100644
index 613e2b356a2c09a7c5e1193c5daca5bf1dae5282..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-MIRAGE-1-12B/f3f55015-88c7-41ae-b588-9a1eedd56fc2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Youlln_ECE-MIRAGE-1-12B/1762652579.96142",
- "retrieved_timestamp": "1762652579.96142",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Youlln/ECE-MIRAGE-1-12B",
- "developer": "Youlln",
- "inference_platform": "unknown",
- "id": "Youlln/ECE-MIRAGE-1-12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20698081091503875
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30107140221306034
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3219375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11095412234042554
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 15.21
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-MIRAGE-1-15B/f904e587-76ac-4583-9235-fcdd20d9a626.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-MIRAGE-1-15B/f904e587-76ac-4583-9235-fcdd20d9a626.json
deleted file mode 100644
index 90178b7c642f741f56e29023e46474cd85c45bb3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-MIRAGE-1-15B/f904e587-76ac-4583-9235-fcdd20d9a626.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Youlln_ECE-MIRAGE-1-15B/1762652579.961622",
- "retrieved_timestamp": "1762652579.961622",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Youlln/ECE-MIRAGE-1-15B",
- "developer": "Youlln",
- "inference_platform": "unknown",
- "id": "Youlln/ECE-MIRAGE-1-15B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20698081091503875
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30107140221306034
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3219375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11095412234042554
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 15.21
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-FT-V3-MUSR/de30a84d-c8cc-4f3c-9eb4-3f58754dc46b.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-FT-V3-MUSR/de30a84d-c8cc-4f3c-9eb4-3f58754dc46b.json
deleted file mode 100644
index 2d73d146d01d7d396088233fe7ce080e73c5362f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-FT-V3-MUSR/de30a84d-c8cc-4f3c-9eb4-3f58754dc46b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-FT-V3-MUSR/1762652579.962029",
- "retrieved_timestamp": "1762652579.962029",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR",
- "developer": "Youlln",
- "inference_platform": "unknown",
- "id": "Youlln/ECE-PRYMMAL-0.5B-FT-V3-MUSR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15334977858748122
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3041148294962408
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02416918429003021
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24916107382550334
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36603125000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1644780585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-FT-V3/45c46c5d-cf81-42d4-bf9e-61aca49b2959.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-FT-V3/45c46c5d-cf81-42d4-bf9e-61aca49b2959.json
deleted file mode 100644
index cc39e6fb51b96cd5ef4a4206752073ba314cc2d2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-FT-V3/45c46c5d-cf81-42d4-bf9e-61aca49b2959.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-FT-V3/1762652579.9618208",
- "retrieved_timestamp": "1762652579.9618208",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Youlln/ECE-PRYMMAL-0.5B-FT-V3",
- "developer": "Youlln",
- "inference_platform": "unknown",
- "id": "Youlln/ECE-PRYMMAL-0.5B-FT-V3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16419101317836673
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30931341134548046
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0030211480362537764
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3644479166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11610704787234043
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-FT-V4-MUSR/68382b86-8a68-428e-8338-144a76b8c293.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-FT-V4-MUSR/68382b86-8a68-428e-8338-144a76b8c293.json
deleted file mode 100644
index 648059681d5e04792e7bdfc747d8a664a9d9bf09..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-FT-V4-MUSR/68382b86-8a68-428e-8338-144a76b8c293.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-FT-V4-MUSR/1762652579.9622452",
- "retrieved_timestamp": "1762652579.962246",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR",
- "developer": "Youlln",
- "inference_platform": "unknown",
- "id": "Youlln/ECE-PRYMMAL-0.5B-FT-V4-MUSR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1137570535069172
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3038362724383693
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.012084592145015106
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3528854166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13214760638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-SLERP-V2/c0fe65df-7e51-48ad-bf40-fd163804cad1.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-SLERP-V2/c0fe65df-7e51-48ad-bf40-fd163804cad1.json
deleted file mode 100644
index b59f85dba6b0f29c33b9bbab4df0fe1438802300..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-SLERP-V2/c0fe65df-7e51-48ad-bf40-fd163804cad1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-SLERP-V2/1762652579.962454",
- "retrieved_timestamp": "1762652579.962455",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V2",
- "developer": "Youlln",
- "inference_platform": "unknown",
- "id": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1611934112599015
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2934774313772131
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0007552870090634441
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3831145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10945811170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-SLERP-V3/d67c4d9a-d5cc-4b26-a439-44c87a299ee8.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-SLERP-V3/d67c4d9a-d5cc-4b26-a439-44c87a299ee8.json
deleted file mode 100644
index 9a2f9bc044145e43139df27ee3994713211c309e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-0.5B-SLERP-V3/d67c4d9a-d5cc-4b26-a439-44c87a299ee8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-0.5B-SLERP-V3/1762652579.9626722",
- "retrieved_timestamp": "1762652579.9626722",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V3",
- "developer": "Youlln",
- "inference_platform": "unknown",
- "id": "Youlln/ECE-PRYMMAL-0.5B-SLERP-V3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16701352411601217
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29383772587210827
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2516778523489933
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.354125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10871010638297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-YL-1B-SLERP-V1/70577ab1-a0ef-41f3-8d6a-00b0b873ee39.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-YL-1B-SLERP-V1/70577ab1-a0ef-41f3-8d6a-00b0b873ee39.json
deleted file mode 100644
index 08dadfd6cd7f2b8fdee565f1bc87208f48ba1d59..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-YL-1B-SLERP-V1/70577ab1-a0ef-41f3-8d6a-00b0b873ee39.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-YL-1B-SLERP-V1/1762652579.962892",
- "retrieved_timestamp": "1762652579.962893",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1",
- "developer": "Youlln",
- "inference_platform": "unknown",
- "id": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32510848991786234
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4208506248736219
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10725075528700906
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4265833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2935505319148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-YL-1B-SLERP-V2/6021f954-951a-47e1-980d-ce729f9f39b4.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-YL-1B-SLERP-V2/6021f954-951a-47e1-980d-ce729f9f39b4.json
deleted file mode 100644
index 0ead19a739c8eb42a7e71d0ad8c523491105fbe5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-YL-1B-SLERP-V2/6021f954-951a-47e1-980d-ce729f9f39b4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-YL-1B-SLERP-V2/1762652579.963118",
- "retrieved_timestamp": "1762652579.963118",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2",
- "developer": "Youlln",
- "inference_platform": "unknown",
- "id": "Youlln/ECE-PRYMMAL-YL-1B-SLERP-V2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32510848991786234
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4208506248736219
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10725075528700906
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4265833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2935505319148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-YL-7B-SLERP-V4/e027a39b-1213-42aa-b66f-b1853c644532.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-YL-7B-SLERP-V4/e027a39b-1213-42aa-b66f-b1853c644532.json
deleted file mode 100644
index b2eb1604e3f7960bd7f9467121d1cabceabee827..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL-YL-7B-SLERP-V4/e027a39b-1213-42aa-b66f-b1853c644532.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL-YL-7B-SLERP-V4/1762652579.963329",
- "retrieved_timestamp": "1762652579.963329",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4",
- "developer": "Youlln",
- "inference_platform": "unknown",
- "id": "Youlln/ECE-PRYMMAL-YL-7B-SLERP-V4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2509696494190969
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37697272812325017
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05362537764350453
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3744895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2131815159574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL0.5-FT/4264c0fc-9f40-4c27-b877-63a751678a1c.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL0.5-FT/4264c0fc-9f40-4c27-b877-63a751678a1c.json
deleted file mode 100644
index 4c460cf0befa2e2dcad168fd19eeb3a47d156564..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL0.5-FT/4264c0fc-9f40-4c27-b877-63a751678a1c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL0.5-FT/1762652579.963541",
- "retrieved_timestamp": "1762652579.963541",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Youlln/ECE-PRYMMAL0.5-FT",
- "developer": "Youlln",
- "inference_platform": "unknown",
- "id": "Youlln/ECE-PRYMMAL0.5-FT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18507338306803725
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31320911187036277
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.023413897280966767
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2558724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.330125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14768949468085107
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL0.5B-Youri/46564b0a-1489-4c98-9e7b-20daf58c2f87.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL0.5B-Youri/46564b0a-1489-4c98-9e7b-20daf58c2f87.json
deleted file mode 100644
index f12f1ead10a847a7d002ee7fefadc3e10c8bc25d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL0.5B-Youri/46564b0a-1489-4c98-9e7b-20daf58c2f87.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL0.5B-Youri/1762652579.963748",
- "retrieved_timestamp": "1762652579.9637492",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Youlln/ECE-PRYMMAL0.5B-Youri",
- "developer": "Youlln",
- "inference_platform": "unknown",
- "id": "Youlln/ECE-PRYMMAL0.5B-Youri"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1446317991817267
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28173574256265815
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24328859060402686
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36965625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10954122340425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL1B-FT-V1/c3a0b587-b379-4013-a5ce-26fdc9dcc44d.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL1B-FT-V1/c3a0b587-b379-4013-a5ce-26fdc9dcc44d.json
deleted file mode 100644
index 60d65c97df9b090daa0bc9bb49ee6e664fb3b8d6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-PRYMMAL1B-FT-V1/c3a0b587-b379-4013-a5ce-26fdc9dcc44d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Youlln_ECE-PRYMMAL1B-FT-V1/1762652579.963949",
- "retrieved_timestamp": "1762652579.9639502",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Youlln/ECE-PRYMMAL1B-FT-V1",
- "developer": "Youlln",
- "inference_platform": "unknown",
- "id": "Youlln/ECE-PRYMMAL1B-FT-V1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2143745262569981
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4032647427840684
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06419939577039276
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2785234899328859
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34165625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2742686170212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-Qwen0.5B-FT-V2/ee8952db-9f0a-4892-bff9-4d2ca1b66364.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-Qwen0.5B-FT-V2/ee8952db-9f0a-4892-bff9-4d2ca1b66364.json
deleted file mode 100644
index a650a0c2a0e2cb6c5373cfcc23668759f357c567..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE-Qwen0.5B-FT-V2/ee8952db-9f0a-4892-bff9-4d2ca1b66364.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Youlln_ECE-Qwen0.5B-FT-V2/1762652579.9641678",
- "retrieved_timestamp": "1762652579.964169",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Youlln/ECE-Qwen0.5B-FT-V2",
- "developer": "Youlln",
- "inference_platform": "unknown",
- "id": "Youlln/ECE-Qwen0.5B-FT-V2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25259311958935626
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.328970813623839
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02039274924471299
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26677852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30628125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16655585106382978
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE.EIFFEIL.ia-0.5B-SLERP/7a5fdffa-146b-43fd-a979-728c37ae599f.json b/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE.EIFFEIL.ia-0.5B-SLERP/7a5fdffa-146b-43fd-a979-728c37ae599f.json
deleted file mode 100644
index 8f573342110aefc26dd485c9bd2d87a71067e69a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Youlln/Youlln_ECE.EIFFEIL.ia-0.5B-SLERP/7a5fdffa-146b-43fd-a979-728c37ae599f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Youlln_ECE.EIFFEIL.ia-0.5B-SLERP/1762652579.964375",
- "retrieved_timestamp": "1762652579.964375",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Youlln/ECE.EIFFEIL.ia-0.5B-SLERP",
- "developer": "Youlln",
- "inference_platform": "unknown",
- "id": "Youlln/ECE.EIFFEIL.ia-0.5B-SLERP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2561403742071038
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33056720460862643
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05966767371601209
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31021875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1903257978723404
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Yuma42/Yuma42_KangalKhan-RawRuby-7B/4ad4a260-770a-4cce-9ba7-546cfa4cde58.json b/leaderboard_data/HFOpenLLMv2/Yuma42/Yuma42_KangalKhan-RawRuby-7B/4ad4a260-770a-4cce-9ba7-546cfa4cde58.json
deleted file mode 100644
index 19a58f89ee45c1028e47d22164b061a0fe76234b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Yuma42/Yuma42_KangalKhan-RawRuby-7B/4ad4a260-770a-4cce-9ba7-546cfa4cde58.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Yuma42_KangalKhan-RawRuby-7B/1762652579.9648829",
- "retrieved_timestamp": "1762652579.964884",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Yuma42/KangalKhan-RawRuby-7B",
- "developer": "Yuma42",
- "inference_platform": "unknown",
- "id": "Yuma42/KangalKhan-RawRuby-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.547674614467391
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47547278683676025
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06646525679758308
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.287751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39495833333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30227726063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/Z1-Coder/Z1-Coder_Z1-Coder-7B/750b35ad-fdf6-4243-91e7-aee90f84fa5b.json b/leaderboard_data/HFOpenLLMv2/Z1-Coder/Z1-Coder_Z1-Coder-7B/750b35ad-fdf6-4243-91e7-aee90f84fa5b.json
deleted file mode 100644
index fd0ddf8c0b4261a6f997530cf4128fde6e863895..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/Z1-Coder/Z1-Coder_Z1-Coder-7B/750b35ad-fdf6-4243-91e7-aee90f84fa5b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Z1-Coder_Z1-Coder-7B/1762652579.9655669",
- "retrieved_timestamp": "1762652579.965568",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Z1-Coder/Z1-Coder-7B",
- "developer": "Z1-Coder",
- "inference_platform": "unknown",
- "id": "Z1-Coder/Z1-Coder-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3215113676157041
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48418251218099567
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.324773413897281
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36215625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37591422872340424
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ZHLiu627/ZHLiu627_zephyr-7b-gemma-dpo-avg/856a1f50-7ffb-4eb1-be4a-8aaa3cd6ee66.json b/leaderboard_data/HFOpenLLMv2/ZHLiu627/ZHLiu627_zephyr-7b-gemma-dpo-avg/856a1f50-7ffb-4eb1-be4a-8aaa3cd6ee66.json
deleted file mode 100644
index 145bb23cdda7834d9b41ed30a06f70416fd17d03..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ZHLiu627/ZHLiu627_zephyr-7b-gemma-dpo-avg/856a1f50-7ffb-4eb1-be4a-8aaa3cd6ee66.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ZHLiu627_zephyr-7b-gemma-dpo-avg/1762652579.9658082",
- "retrieved_timestamp": "1762652579.9658089",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ZHLiu627/zephyr-7b-gemma-dpo-avg",
- "developer": "ZHLiu627",
- "inference_platform": "unknown",
- "id": "ZHLiu627/zephyr-7b-gemma-dpo-avg"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30899679517014855
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41488227982365095
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.045317220543806644
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2785234899328859
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4107083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28507313829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 8.538
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ZeroXClem/ZeroXClem_L3-Aspire-Heart-Matrix-8B/e6d8d952-5a3d-4a97-860c-8275b10c6516.json b/leaderboard_data/HFOpenLLMv2/ZeroXClem/ZeroXClem_L3-Aspire-Heart-Matrix-8B/e6d8d952-5a3d-4a97-860c-8275b10c6516.json
deleted file mode 100644
index 97f2127f564f58e56033393a7a565e4e56e0837c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ZeroXClem/ZeroXClem_L3-Aspire-Heart-Matrix-8B/e6d8d952-5a3d-4a97-860c-8275b10c6516.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ZeroXClem_L3-Aspire-Heart-Matrix-8B/1762652579.96632",
- "retrieved_timestamp": "1762652579.966321",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ZeroXClem/L3-Aspire-Heart-Matrix-8B",
- "developer": "ZeroXClem",
- "inference_platform": "unknown",
- "id": "ZeroXClem/L3-Aspire-Heart-Matrix-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48335305877294465
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5384211938486898
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18277945619335348
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32466442953020136
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4187083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3784906914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ZeusLabs/ZeusLabs_L3-Aethora-15B-V2/0e9ed58c-1a3e-49b4-8013-994642a95920.json b/leaderboard_data/HFOpenLLMv2/ZeusLabs/ZeusLabs_L3-Aethora-15B-V2/0e9ed58c-1a3e-49b4-8013-994642a95920.json
deleted file mode 100644
index e57d28d679d010d2afa523862bc1e0ce19c55a23..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ZeusLabs/ZeusLabs_L3-Aethora-15B-V2/0e9ed58c-1a3e-49b4-8013-994642a95920.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ZeusLabs_L3-Aethora-15B-V2/1762652579.968798",
- "retrieved_timestamp": "1762652579.9687989",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ZeusLabs/L3-Aethora-15B-V2",
- "developer": "ZeusLabs",
- "inference_platform": "unknown",
- "id": "ZeusLabs/L3-Aethora-15B-V2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7208063493752133
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5010910465463698
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08081570996978851
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.287751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3870833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3499833776595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 15.01
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ZhangShenao/ZhangShenao_SELM-Llama-3-8B-Instruct-iter-3/6bf4063b-44aa-4809-a400-5406abe5eb2e.json b/leaderboard_data/HFOpenLLMv2/ZhangShenao/ZhangShenao_SELM-Llama-3-8B-Instruct-iter-3/6bf4063b-44aa-4809-a400-5406abe5eb2e.json
deleted file mode 100644
index 16acba86f9c00dea694a78b3c241899e81339eaf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ZhangShenao/ZhangShenao_SELM-Llama-3-8B-Instruct-iter-3/6bf4063b-44aa-4809-a400-5406abe5eb2e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ZhangShenao_SELM-Llama-3-8B-Instruct-iter-3/1762652579.9690418",
- "retrieved_timestamp": "1762652579.969043",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3",
- "developer": "ZhangShenao",
- "inference_platform": "unknown",
- "id": "ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6902817856620433
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5046089390770511
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08610271903323263
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25838926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38451041666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3783244680851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Dracarys-72B-Instruct/2f1e6f4e-86e6-47a4-96e6-3bc2b330cd3a.json b/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Dracarys-72B-Instruct/2f1e6f4e-86e6-47a4-96e6-3bc2b330cd3a.json
deleted file mode 100644
index eb9544f7138bbb06c5b26a71146a3628ed86a879..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Dracarys-72B-Instruct/2f1e6f4e-86e6-47a4-96e6-3bc2b330cd3a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/abacusai_Dracarys-72B-Instruct/1762652579.969532",
- "retrieved_timestamp": "1762652579.969532",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "abacusai/Dracarys-72B-Instruct",
- "developer": "abacusai",
- "inference_platform": "unknown",
- "id": "abacusai/Dracarys-72B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7855778224001206
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6944066392084981
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39652567975830816
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39093959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4558229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5456283244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.706
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-34B-v0.1/e0b9044d-1b87-44f7-b59b-88d790f429e5.json b/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-34B-v0.1/e0b9044d-1b87-44f7-b59b-88d790f429e5.json
deleted file mode 100644
index d6c386a5287771a967b5bf89aa321357cb0892b8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-34B-v0.1/e0b9044d-1b87-44f7-b59b-88d790f429e5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/abacusai_Smaug-34B-v0.1/1762652579.970392",
- "retrieved_timestamp": "1762652579.9703932",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "abacusai/Smaug-34B-v0.1",
- "developer": "abacusai",
- "inference_platform": "unknown",
- "id": "abacusai/Smaug-34B-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5015625207782018
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5357785983493821
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07175226586102719
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3296979865771812
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.397875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4542885638297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.389
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-72B-v0.1/a3b08cd3-6ead-4db0-92ed-212c6b0e45ee.json b/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-72B-v0.1/a3b08cd3-6ead-4db0-92ed-212c6b0e45ee.json
deleted file mode 100644
index e64036cc2af5f462846f88fe8ba44f0952276bd6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-72B-v0.1/a3b08cd3-6ead-4db0-92ed-212c6b0e45ee.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/abacusai_Smaug-72B-v0.1/1762652579.970887",
- "retrieved_timestamp": "1762652579.9708889",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "abacusai/Smaug-72B-v0.1",
- "developer": "abacusai",
- "inference_platform": "unknown",
- "id": "abacusai/Smaug-72B-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5167001334237601
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5995632330786429
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19108761329305135
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3238255033557047
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4473229166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4623503989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 72.289
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-Llama-3-70B-Instruct-32K/962b4977-63f0-4a87-a36e-f3e592b74761.json b/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-Llama-3-70B-Instruct-32K/962b4977-63f0-4a87-a36e-f3e592b74761.json
deleted file mode 100644
index b0f2994fbc4d959f2e669a8f0092e4f3519b8b90..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-Llama-3-70B-Instruct-32K/962b4977-63f0-4a87-a36e-f3e592b74761.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/abacusai_Smaug-Llama-3-70B-Instruct-32K/1762652579.971162",
- "retrieved_timestamp": "1762652579.9711628",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "abacusai/Smaug-Llama-3-70B-Instruct-32K",
- "developer": "abacusai",
- "inference_platform": "unknown",
- "id": "abacusai/Smaug-Llama-3-70B-Instruct-32K"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7761107195574409
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6493108088828602
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27492447129909364
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4207916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47647938829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-Mixtral-v0.1/ba0fe822-7a57-4ccb-a97e-e852a59d9ae1.json b/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-Mixtral-v0.1/ba0fe822-7a57-4ccb-a97e-e852a59d9ae1.json
deleted file mode 100644
index 9ab3a95b6601887f31659ab5f36405bc7e75ea0a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-Mixtral-v0.1/ba0fe822-7a57-4ccb-a97e-e852a59d9ae1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/abacusai_Smaug-Mixtral-v0.1/1762652579.971408",
- "retrieved_timestamp": "1762652579.9714088",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "abacusai/Smaug-Mixtral-v0.1",
- "developer": "abacusai",
- "inference_platform": "unknown",
- "id": "abacusai/Smaug-Mixtral-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5554428915278129
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5162245602454115
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09516616314199396
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011744966442953
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4298125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3351894946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 46.703
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-Qwen2-72B-Instruct/84695a6b-dc11-448c-bbeb-b3cc05cde7ba.json b/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-Qwen2-72B-Instruct/84695a6b-dc11-448c-bbeb-b3cc05cde7ba.json
deleted file mode 100644
index d158e05e84632c7ed96344f9a17c4ca537bc8ecc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_Smaug-Qwen2-72B-Instruct/84695a6b-dc11-448c-bbeb-b3cc05cde7ba.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/abacusai_Smaug-Qwen2-72B-Instruct/1762652579.9716392",
- "retrieved_timestamp": "1762652579.97164",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "abacusai/Smaug-Qwen2-72B-Instruct",
- "developer": "abacusai",
- "inference_platform": "unknown",
- "id": "abacusai/Smaug-Qwen2-72B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7825303527972447
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6909789934583822
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4131419939577039
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3615771812080537
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44007291666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.519032579787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.706
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_bigstral-12b-32k/aed1ac03-5364-477e-ab8f-68b599170128.json b/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_bigstral-12b-32k/aed1ac03-5364-477e-ab8f-68b599170128.json
deleted file mode 100644
index c719c1240d953e9a23c70874b944843f3150539d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_bigstral-12b-32k/aed1ac03-5364-477e-ab8f-68b599170128.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/abacusai_bigstral-12b-32k/1762652579.971883",
- "retrieved_timestamp": "1762652579.971884",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "abacusai/bigstral-12b-32k",
- "developer": "abacusai",
- "inference_platform": "unknown",
- "id": "abacusai/bigstral-12b-32k"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41938057686937324
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4700122314782882
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015105740181268883
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29278523489932884
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45597916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26412898936170215
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.476
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_bigyi-15b/19b4d65c-39c7-4b81-bb71-f166ab4f9490.json b/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_bigyi-15b/19b4d65c-39c7-4b81-bb71-f166ab4f9490.json
deleted file mode 100644
index 9469eaf24a2618e32d5a8d40fbc10e72c07f170c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/abacusai/abacusai_bigyi-15b/19b4d65c-39c7-4b81-bb71-f166ab4f9490.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/abacusai_bigyi-15b/1762652579.972117",
- "retrieved_timestamp": "1762652579.972117",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "abacusai/bigyi-15b",
- "developer": "abacusai",
- "inference_platform": "unknown",
- "id": "abacusai/bigyi-15b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20940327220663396
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4345298820215116
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02945619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35378125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30028257978723405
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 15.058
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/abhishek/abhishek_autotrain-0tmgq-5tpbg/b5707c22-a2a2-4787-a902-b72945ebccd9.json b/leaderboard_data/HFOpenLLMv2/abhishek/abhishek_autotrain-0tmgq-5tpbg/b5707c22-a2a2-4787-a902-b72945ebccd9.json
deleted file mode 100644
index 1e269ffdb909bee611e1ac70b2931a4cf5ceab52..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/abhishek/abhishek_autotrain-0tmgq-5tpbg/b5707c22-a2a2-4787-a902-b72945ebccd9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/abhishek_autotrain-0tmgq-5tpbg/1762652579.972783",
- "retrieved_timestamp": "1762652579.972784",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "abhishek/autotrain-0tmgq-5tpbg",
- "developer": "abhishek",
- "inference_platform": "unknown",
- "id": "abhishek/autotrain-0tmgq-5tpbg"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19516549422199764
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3127326480314375
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01283987915407855
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35837499999999994
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11436170212765957
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/abhishek/abhishek_autotrain-0tmgq-5tpbg/ddd32642-ed7a-41b8-974a-f85b7f04d0db.json b/leaderboard_data/HFOpenLLMv2/abhishek/abhishek_autotrain-0tmgq-5tpbg/ddd32642-ed7a-41b8-974a-f85b7f04d0db.json
deleted file mode 100644
index 5b9d4bd846596750a6059827fc595492aa194983..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/abhishek/abhishek_autotrain-0tmgq-5tpbg/ddd32642-ed7a-41b8-974a-f85b7f04d0db.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/abhishek_autotrain-0tmgq-5tpbg/1762652579.972393",
- "retrieved_timestamp": "1762652579.972395",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "abhishek/autotrain-0tmgq-5tpbg",
- "developer": "abhishek",
- "inference_platform": "unknown",
- "id": "abhishek/autotrain-0tmgq-5tpbg"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19571514692127998
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3134513987945074
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2516778523489933
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36504166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11510970744680851
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/abhishek/abhishek_autotrain-vr4a1-e5mms/e1462a5a-d120-4c0f-ba13-fbecb18619a0.json b/leaderboard_data/HFOpenLLMv2/abhishek/abhishek_autotrain-vr4a1-e5mms/e1462a5a-d120-4c0f-ba13-fbecb18619a0.json
deleted file mode 100644
index 99960a026e163d243184977ec0e809ee6a8d69d4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/abhishek/abhishek_autotrain-vr4a1-e5mms/e1462a5a-d120-4c0f-ba13-fbecb18619a0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/abhishek_autotrain-vr4a1-e5mms/1762652579.973708",
- "retrieved_timestamp": "1762652579.973709",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "abhishek/autotrain-vr4a1-e5mms",
- "developer": "abhishek",
- "inference_platform": "unknown",
- "id": "abhishek/autotrain-vr4a1-e5mms"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21422492320376602
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5000624442873264
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14123867069486404
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.389125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36668882978723405
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "?",
- "params_billions": 16.061
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/adamo1139/adamo1139_Yi-34B-200K-AEZAKMI-v2/a28de361-e90d-44f7-b609-e4d64ae1be6f.json b/leaderboard_data/HFOpenLLMv2/adamo1139/adamo1139_Yi-34B-200K-AEZAKMI-v2/a28de361-e90d-44f7-b609-e4d64ae1be6f.json
deleted file mode 100644
index 5a30331a4cb961d53ba27b12c7ca5816677d25a9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/adamo1139/adamo1139_Yi-34B-200K-AEZAKMI-v2/a28de361-e90d-44f7-b609-e4d64ae1be6f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/adamo1139_Yi-34B-200K-AEZAKMI-v2/1762652579.974368",
- "retrieved_timestamp": "1762652579.974369",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "adamo1139/Yi-34B-200K-AEZAKMI-v2",
- "developer": "adamo1139",
- "inference_platform": "unknown",
- "id": "adamo1139/Yi-34B-200K-AEZAKMI-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4555257827010111
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5383819237015192
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05664652567975831
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33221476510067116
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38860416666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4512965425531915
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.389
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/aevalone/aevalone_distill_qw_test/108ead60-3cee-43e7-925a-619bace5b65f.json b/leaderboard_data/HFOpenLLMv2/aevalone/aevalone_distill_qw_test/108ead60-3cee-43e7-925a-619bace5b65f.json
deleted file mode 100644
index 5c8f47cfe0750cb4b47ea1ceec635719458a792a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/aevalone/aevalone_distill_qw_test/108ead60-3cee-43e7-925a-619bace5b65f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/aevalone_distill_qw_test/1762652579.975426",
- "retrieved_timestamp": "1762652579.9754272",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "aevalone/distill_qw_test",
- "developer": "aevalone",
- "inference_platform": "unknown",
- "id": "aevalone/distill_qw_test"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.740889728143548
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5245748734435777
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4780966767371601
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38596874999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4091589095744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/agentlans/agentlans_Llama-3.2-1B-Instruct-CrashCourse12K/fbedd898-b839-49c1-bd6d-3a8744d4138a.json b/leaderboard_data/HFOpenLLMv2/agentlans/agentlans_Llama-3.2-1B-Instruct-CrashCourse12K/fbedd898-b839-49c1-bd6d-3a8744d4138a.json
deleted file mode 100644
index 7a1a41bedb5a4841f57762820b229fc19383e0c8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/agentlans/agentlans_Llama-3.2-1B-Instruct-CrashCourse12K/fbedd898-b839-49c1-bd6d-3a8744d4138a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/agentlans_Llama-3.2-1B-Instruct-CrashCourse12K/1762652579.976028",
- "retrieved_timestamp": "1762652579.976029",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "agentlans/Llama-3.2-1B-Instruct-CrashCourse12K",
- "developer": "agentlans",
- "inference_platform": "unknown",
- "id": "agentlans/Llama-3.2-1B-Instruct-CrashCourse12K"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5395062877609188
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35481032861183426
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07099697885196375
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2407718120805369
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32104166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1809341755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.236
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/agentlans/agentlans_Llama3.1-Daredevilish-Instruct/7a6d7a66-5772-4793-9597-ef0225b63f30.json b/leaderboard_data/HFOpenLLMv2/agentlans/agentlans_Llama3.1-Daredevilish-Instruct/7a6d7a66-5772-4793-9597-ef0225b63f30.json
deleted file mode 100644
index 0026724efc0ddb6ca0bb89d5dba41e1c4b63d486..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/agentlans/agentlans_Llama3.1-Daredevilish-Instruct/7a6d7a66-5772-4793-9597-ef0225b63f30.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/agentlans_Llama3.1-Daredevilish-Instruct/1762652579.9768262",
- "retrieved_timestamp": "1762652579.976827",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "agentlans/Llama3.1-Daredevilish-Instruct",
- "developer": "agentlans",
- "inference_platform": "unknown",
- "id": "agentlans/Llama3.1-Daredevilish-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7925969760236173
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5235442557198345
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17220543806646527
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3910833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3877160904255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/agentlans/agentlans_Qwen2.5-0.5B-Instruct-CrashCourse-dropout/ad130d6f-6a5e-447a-a1ee-bfa2d93e5336.json b/leaderboard_data/HFOpenLLMv2/agentlans/agentlans_Qwen2.5-0.5B-Instruct-CrashCourse-dropout/ad130d6f-6a5e-447a-a1ee-bfa2d93e5336.json
deleted file mode 100644
index f394063013e4754b57ea25517b42aaf23a1f843a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/agentlans/agentlans_Qwen2.5-0.5B-Instruct-CrashCourse-dropout/ad130d6f-6a5e-447a-a1ee-bfa2d93e5336.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/agentlans_Qwen2.5-0.5B-Instruct-CrashCourse-dropout/1762652579.9778361",
- "retrieved_timestamp": "1762652579.977837",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout",
- "developer": "agentlans",
- "inference_platform": "unknown",
- "id": "agentlans/Qwen2.5-0.5B-Instruct-CrashCourse-dropout"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2948831323111566
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3311726760218689
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04229607250755287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3341875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16082114361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ai21labs/ai21labs_Jamba-v0.1/e9546f28-0f6b-449e-a2b3-c6ab262103cc.json b/leaderboard_data/HFOpenLLMv2/ai21labs/ai21labs_Jamba-v0.1/e9546f28-0f6b-449e-a2b3-c6ab262103cc.json
deleted file mode 100644
index 489d7fd080b4b7bc71db71906b1a475f818fac20..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ai21labs/ai21labs_Jamba-v0.1/e9546f28-0f6b-449e-a2b3-c6ab262103cc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ai21labs_Jamba-v0.1/1762652579.978585",
- "retrieved_timestamp": "1762652579.978585",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ai21labs/Jamba-v0.1",
- "developer": "ai21labs",
- "inference_platform": "unknown",
- "id": "ai21labs/Jamba-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20255920956395698
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36022602451645724
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015861027190332326
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35902083333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24916888297872342
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "JambaForCausalLM",
- "params_billions": 51.57
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ai4bharat/ai4bharat_Airavata/350b0559-6331-4b8b-82e2-0463baea9d8a.json b/leaderboard_data/HFOpenLLMv2/ai4bharat/ai4bharat_Airavata/350b0559-6331-4b8b-82e2-0463baea9d8a.json
deleted file mode 100644
index 35ff3e5b00d09f03861477b77e867bd1e274e28f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ai4bharat/ai4bharat_Airavata/350b0559-6331-4b8b-82e2-0463baea9d8a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ai4bharat_Airavata/1762652579.978861",
- "retrieved_timestamp": "1762652579.978862",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ai4bharat/Airavata",
- "developer": "ai4bharat",
- "inference_platform": "unknown",
- "id": "ai4bharat/Airavata"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05585402288150995
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36276862514633795
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01812688821752266
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3762916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1634807180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.87
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/aixonlab/aixonlab_Aether-12b/831b6f81-1552-4a7b-acac-eb927001e440.json b/leaderboard_data/HFOpenLLMv2/aixonlab/aixonlab_Aether-12b/831b6f81-1552-4a7b-acac-eb927001e440.json
deleted file mode 100644
index ff491bec3c35b6995328f768aac5b5e96c863071..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/aixonlab/aixonlab_Aether-12b/831b6f81-1552-4a7b-acac-eb927001e440.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/aixonlab_Aether-12b/1762652579.979132",
- "retrieved_timestamp": "1762652579.979133",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "aixonlab/Aether-12b",
- "developer": "aixonlab",
- "inference_platform": "unknown",
- "id": "aixonlab/Aether-12b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23468286369056326
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5179400750435481
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10649546827794562
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38286458333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3410073138297872
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/aixonlab/aixonlab_Grey-12b/2c4626c7-3016-4641-9862-0ba4f7f7936c.json b/leaderboard_data/HFOpenLLMv2/aixonlab/aixonlab_Grey-12b/2c4626c7-3016-4641-9862-0ba4f7f7936c.json
deleted file mode 100644
index 9ea3b7a6c92848457c603578b5a8986f01addccb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/aixonlab/aixonlab_Grey-12b/2c4626c7-3016-4641-9862-0ba4f7f7936c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/aixonlab_Grey-12b/1762652579.979384",
- "retrieved_timestamp": "1762652579.9793851",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "aixonlab/Grey-12b",
- "developer": "aixonlab",
- "inference_platform": "unknown",
- "id": "aixonlab/Grey-12b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39679938119744496
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5698957505959833
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09818731117824774
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4516354166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3779089095744681
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/aixonlab/aixonlab_Zara-14b-v1.2/a4c3ddcb-482c-47fb-9290-3c0678b38fb4.json b/leaderboard_data/HFOpenLLMv2/aixonlab/aixonlab_Zara-14b-v1.2/a4c3ddcb-482c-47fb-9290-3c0678b38fb4.json
deleted file mode 100644
index bff3ca2b03bfb20eb1b0253e54b530110a3fec17..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/aixonlab/aixonlab_Zara-14b-v1.2/a4c3ddcb-482c-47fb-9290-3c0678b38fb4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/aixonlab_Zara-14b-v1.2/1762652579.979647",
- "retrieved_timestamp": "1762652579.979647",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "aixonlab/Zara-14b-v1.2",
- "developer": "aixonlab",
- "inference_platform": "unknown",
- "id": "aixonlab/Zara-14b-v1.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6197400674654362
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6405368457456163
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35347432024169184
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38171140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46747916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5263464095744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/1-800-LLMs_Qwen-2.5-14B-Hindi/21ba6052-9614-454e-999d-ef4f0f693c6c.json b/leaderboard_data/HFOpenLLMv2/alibaba/1-800-LLMs_Qwen-2.5-14B-Hindi/21ba6052-9614-454e-999d-ef4f0f693c6c.json
deleted file mode 100644
index c2dc37ec641b072aea799f861eead8b0011f591e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/1-800-LLMs_Qwen-2.5-14B-Hindi/21ba6052-9614-454e-999d-ef4f0f693c6c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/1-800-LLMs_Qwen-2.5-14B-Hindi/1762652579.467683",
- "retrieved_timestamp": "1762652579.4676852",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "1-800-LLMs/Qwen-2.5-14B-Hindi",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "1-800-LLMs/Qwen-2.5-14B-Hindi"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.582570911847232
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6523901531956199
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3330815709969788
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3624161073825503
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4489375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5262632978723404
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/1024m_QWEN-14B-B100/745bd077-3a0f-4c06-8d19-d7c160512446.json b/leaderboard_data/HFOpenLLMv2/alibaba/1024m_QWEN-14B-B100/745bd077-3a0f-4c06-8d19-d7c160512446.json
deleted file mode 100644
index 4b46bbafc4cd8c44c3bc32b7708cea48f814f7ec..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/1024m_QWEN-14B-B100/745bd077-3a0f-4c06-8d19-d7c160512446.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/1024m_QWEN-14B-B100/1762652579.468843",
- "retrieved_timestamp": "1762652579.4688451",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "1024m/QWEN-14B-B100",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "1024m/QWEN-14B-B100"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7762104549262623
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.653271132679638
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5438066465256798
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35067114093959734
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5178690159574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Aashraf995_Qwen-Evo-7B/705ae322-fed9-4a98-a79e-e0b289065ba9.json b/leaderboard_data/HFOpenLLMv2/alibaba/Aashraf995_Qwen-Evo-7B/705ae322-fed9-4a98-a79e-e0b289065ba9.json
deleted file mode 100644
index 51d57e54d9ec3b303586c94b35d76b9cb916274a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Aashraf995_Qwen-Evo-7B/705ae322-fed9-4a98-a79e-e0b289065ba9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Aashraf995_Qwen-Evo-7B/1762652579.4765608",
- "retrieved_timestamp": "1762652579.476562",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Aashraf995/Qwen-Evo-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Aashraf995/Qwen-Evo-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4757343847657549
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5709361538590277
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31419939577039274
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32550335570469796
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4541458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44622672872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Aashraf995_QwenStock-14B/7888b813-8ef1-4367-8168-edd1bd3c7888.json b/leaderboard_data/HFOpenLLMv2/alibaba/Aashraf995_QwenStock-14B/7888b813-8ef1-4367-8168-edd1bd3c7888.json
deleted file mode 100644
index 953f8a0fc0caec8d400adf6fa4a01dbbf8720be8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Aashraf995_QwenStock-14B/7888b813-8ef1-4367-8168-edd1bd3c7888.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Aashraf995_QwenStock-14B/1762652579.476816",
- "retrieved_timestamp": "1762652579.476817",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Aashraf995/QwenStock-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Aashraf995/QwenStock-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5008632650256873
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6550130348108012
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35725075528700906
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38926174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4792604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5382313829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Alsebay_Qwen2.5-7B-test-novelist/19ff3120-2171-48b3-8db6-1c76bb57cf47.json b/leaderboard_data/HFOpenLLMv2/alibaba/Alsebay_Qwen2.5-7B-test-novelist/19ff3120-2171-48b3-8db6-1c76bb57cf47.json
deleted file mode 100644
index 863c018c78cef0dbba314bca52d08622eee0fe0b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Alsebay_Qwen2.5-7B-test-novelist/19ff3120-2171-48b3-8db6-1c76bb57cf47.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Alsebay_Qwen2.5-7B-test-novelist/1762652579.479883",
- "retrieved_timestamp": "1762652579.4798841",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Alsebay/Qwen2.5-7B-test-novelist",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Alsebay/Qwen2.5-7B-test-novelist"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5351600420218354
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.515121518446605
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2348942598187311
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47488541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3865525265957447
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Aryanne_QwentileSwap/ee2c5dd9-09db-45fa-8e67-961993d30672.json b/leaderboard_data/HFOpenLLMv2/alibaba/Aryanne_QwentileSwap/ee2c5dd9-09db-45fa-8e67-961993d30672.json
deleted file mode 100644
index 30f9c1793be9b6689c2d90ca62bd6a7f865d5642..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Aryanne_QwentileSwap/ee2c5dd9-09db-45fa-8e67-961993d30672.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Aryanne_QwentileSwap/1762652579.4827101",
- "retrieved_timestamp": "1762652579.482711",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Aryanne/QwentileSwap",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Aryanne/QwentileSwap"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7378422585406721
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7008370136278447
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42220543806646527
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3674496644295302
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4640416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5945811170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/AtAndDev_Qwen2.5-1.5B-continuous-learnt/1a2d8396-4ff1-4386-a76b-d4863c7736c5.json b/leaderboard_data/HFOpenLLMv2/alibaba/AtAndDev_Qwen2.5-1.5B-continuous-learnt/1a2d8396-4ff1-4386-a76b-d4863c7736c5.json
deleted file mode 100644
index ea911d3f094da527dad8bb2ab4e518b2f970145a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/AtAndDev_Qwen2.5-1.5B-continuous-learnt/1a2d8396-4ff1-4386-a76b-d4863c7736c5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AtAndDev_Qwen2.5-1.5B-continuous-learnt/1762652579.483878",
- "retrieved_timestamp": "1762652579.4838789",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AtAndDev/Qwen2.5-1.5B-continuous-learnt",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "AtAndDev/Qwen2.5-1.5B-continuous-learnt"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45105431366551857
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42746984992662185
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1472809667673716
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36228124999999994
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28058510638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/AtAndDev_Qwen2.5-1.5B-continuous-learnt/4f7f368f-0646-4c16-80de-69d9c5e28193.json b/leaderboard_data/HFOpenLLMv2/alibaba/AtAndDev_Qwen2.5-1.5B-continuous-learnt/4f7f368f-0646-4c16-80de-69d9c5e28193.json
deleted file mode 100644
index 1203840e510f0aeb1ff16a66cd6e8aa9c993b04c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/AtAndDev_Qwen2.5-1.5B-continuous-learnt/4f7f368f-0646-4c16-80de-69d9c5e28193.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AtAndDev_Qwen2.5-1.5B-continuous-learnt/1762652579.483521",
- "retrieved_timestamp": "1762652579.483522",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AtAndDev/Qwen2.5-1.5B-continuous-learnt",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "AtAndDev/Qwen2.5-1.5B-continuous-learnt"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4605214165081982
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42577470857933336
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07477341389728097
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3636458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28116688829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES/dcd14b21-f2fd-4c10-bf83-b6bb946f2789.json b/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES/dcd14b21-f2fd-4c10-bf83-b6bb946f2789.json
deleted file mode 100644
index f5b275fdb36732723c11c3c33cb1f1fb58699f6c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES/dcd14b21-f2fd-4c10-bf83-b6bb946f2789.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CombinHorizon_Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES/1762652579.508495",
- "retrieved_timestamp": "1762652579.5084958",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CombinHorizon/Josiefied-abliteratedV4-Qwen2.5-14B-Inst-BaseMerge-TIES"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8239958864701216
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6370093752306357
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5317220543806647
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32466442953020136
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42603125000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4979222074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES/3171e54f-4c6f-40cf-ba6c-ef23b803ca33.json b/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES/3171e54f-4c6f-40cf-ba6c-ef23b803ca33.json
deleted file mode 100644
index aefa35b7cd101d89a60c73b2f667448e0d5443bf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES/3171e54f-4c6f-40cf-ba6c-ef23b803ca33.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CombinHorizon_Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES/1762652579.508758",
- "retrieved_timestamp": "1762652579.508759",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CombinHorizon/Rombos-Qwen2.5-7B-Inst-BaseMerge-TIES"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7564019025075688
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5402085849577634
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.493202416918429
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40330208333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4341755319148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES/62faed28-8f0f-4ff8-894f-b4b5b754b4cf.json b/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES/62faed28-8f0f-4ff8-894f-b4b5b754b4cf.json
deleted file mode 100644
index b282f2f20a1ba617e25723a18a2e782780f9027c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES/62faed28-8f0f-4ff8-894f-b4b5b754b4cf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CombinHorizon_huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES/1762652579.509247",
- "retrieved_timestamp": "1762652579.509248",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CombinHorizon/huihui-ai-abliterated-Qwen2.5-32B-Inst-BaseMerge-TIES"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8206237228331937
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.692924708291253
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5944108761329305
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3389261744966443
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42072916666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5720578457446809
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES/62b4c918-b33b-40cf-888b-42b116a9e04d.json b/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES/62b4c918-b33b-40cf-888b-42b116a9e04d.json
deleted file mode 100644
index d9b653064f708af24431ef18971d444433110e8f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES/62b4c918-b33b-40cf-888b-42b116a9e04d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CombinHorizon_huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES/1762652579.509461",
- "retrieved_timestamp": "1762652579.509462",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CombinHorizon/huihui-ai-abliteratedV2-Qwen2.5-14B-Inst-BaseMerge-TIES"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8175762532303177
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6335891556421077
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.547583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3145973154362416
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42603125000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4910239361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES/3bf71784-e6f1-405b-ad23-e74a91df7051.json b/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES/3bf71784-e6f1-405b-ad23-e74a91df7051.json
deleted file mode 100644
index ef98e5850da33a438c55eb71f7382efffb4f7eed..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CombinHorizon_zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES/3bf71784-e6f1-405b-ad23-e74a91df7051.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CombinHorizon_zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES/1762652579.509675",
- "retrieved_timestamp": "1762652579.509676",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CombinHorizon/zetasepic-abliteratedV2-Qwen2.5-32B-Inst-BaseMerge-TIES"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8328136012446974
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6955174427138592
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5853474320241692
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3674496644295302
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43139583333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5684840425531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CoolSpring_Qwen2-0.5B-Abyme-merge2/2121d736-eec6-4a86-bae0-cd032f9eb603.json b/leaderboard_data/HFOpenLLMv2/alibaba/CoolSpring_Qwen2-0.5B-Abyme-merge2/2121d736-eec6-4a86-bae0-cd032f9eb603.json
deleted file mode 100644
index a5fa40c3c1a2ce84722d5210dfd624d1427a00d9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CoolSpring_Qwen2-0.5B-Abyme-merge2/2121d736-eec6-4a86-bae0-cd032f9eb603.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CoolSpring_Qwen2-0.5B-Abyme-merge2/1762652579.511093",
- "retrieved_timestamp": "1762652579.511094",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CoolSpring/Qwen2-0.5B-Abyme-merge2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CoolSpring/Qwen2-0.5B-Abyme-merge2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2021846478454944
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29942723009138733
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03323262839879154
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3687291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14893617021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CoolSpring_Qwen2-0.5B-Abyme-merge3/2a633e8b-b35a-4a26-83bb-b471bab18ed2.json b/leaderboard_data/HFOpenLLMv2/alibaba/CoolSpring_Qwen2-0.5B-Abyme-merge3/2a633e8b-b35a-4a26-83bb-b471bab18ed2.json
deleted file mode 100644
index d1dfaaaee072fc7e5a29aaac3382c6986c60859d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CoolSpring_Qwen2-0.5B-Abyme-merge3/2a633e8b-b35a-4a26-83bb-b471bab18ed2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CoolSpring_Qwen2-0.5B-Abyme-merge3/1762652579.51142",
- "retrieved_timestamp": "1762652579.511421",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CoolSpring/Qwen2-0.5B-Abyme-merge3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CoolSpring/Qwen2-0.5B-Abyme-merge3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23860468002677343
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30031404525933675
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03172205438066465
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35009375000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15001662234042554
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CoolSpring_Qwen2-0.5B-Abyme/46d2afd2-b620-4474-ac6c-4f6bdef93d1c.json b/leaderboard_data/HFOpenLLMv2/alibaba/CoolSpring_Qwen2-0.5B-Abyme/46d2afd2-b620-4474-ac6c-4f6bdef93d1c.json
deleted file mode 100644
index 52bff1e7717a523fc97345e9a9ca7e0e55791c8d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CoolSpring_Qwen2-0.5B-Abyme/46d2afd2-b620-4474-ac6c-4f6bdef93d1c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CoolSpring_Qwen2-0.5B-Abyme/1762652579.5106628",
- "retrieved_timestamp": "1762652579.510665",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CoolSpring/Qwen2-0.5B-Abyme",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CoolSpring/Qwen2-0.5B-Abyme"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19151850423542865
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2861834296481826
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02945619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2533557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35421875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13331117021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Broca/4429613e-2db7-4061-931f-eaa70d202b71.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Broca/4429613e-2db7-4061-931f-eaa70d202b71.json
deleted file mode 100644
index 11ab7889547abb3d6a7d2bf2b77e5ff1d40a7a82..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Broca/4429613e-2db7-4061-931f-eaa70d202b71.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Broca/1762652579.5150259",
- "retrieved_timestamp": "1762652579.5150259",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-Broca",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-Broca"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.560414145578177
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6527145981540362
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3580060422960725
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38674496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47665625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5364029255319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-BrocaV9/782219f0-25f7-465b-9f86-5e48c9d4703e.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-BrocaV9/782219f0-25f7-465b-9f86-5e48c9d4703e.json
deleted file mode 100644
index 82a610c8f73f051341931e3992dc6dbefa5b42d4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-BrocaV9/782219f0-25f7-465b-9f86-5e48c9d4703e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-BrocaV9/1762652579.515307",
- "retrieved_timestamp": "1762652579.5153081",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-BrocaV9",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-BrocaV9"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6762933460994606
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6391383585238984
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3814199395770393
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3640939597315436
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46903125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5330784574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Brocav3/7abe4912-4e21-4774-8011-482603f7bcc0.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Brocav3/7abe4912-4e21-4774-8011-482603f7bcc0.json
deleted file mode 100644
index e30b9b3168207856815c04e97e4d5a3b5bcfd333..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Brocav3/7abe4912-4e21-4774-8011-482603f7bcc0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Brocav3/1762652579.5155342",
- "retrieved_timestamp": "1762652579.515535",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-Brocav3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-Brocav3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6951776841004091
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6452353476182755
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38746223564954685
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35906040268456374
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4756354166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.531748670212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Brocav6/63a1000f-1de8-42ef-a905-70b78bf46417.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Brocav6/63a1000f-1de8-42ef-a905-70b78bf46417.json
deleted file mode 100644
index 04c33b1bb956a797981fa725ddb7520dc6ec942a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Brocav6/63a1000f-1de8-42ef-a905-70b78bf46417.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Brocav6/1762652579.515748",
- "retrieved_timestamp": "1762652579.5157492",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-Brocav6",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-Brocav6"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6995239298394925
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6388835266626555
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38746223564954685
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3674496644295302
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47420833333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5319148936170213
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Brocav7/6966d397-d336-455a-a156-c2e6430c813f.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Brocav7/6966d397-d336-455a-a156-c2e6430c813f.json
deleted file mode 100644
index 01716e63ee6c34fa241c6b87b653eac6fb26f680..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Brocav7/6966d397-d336-455a-a156-c2e6430c813f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Brocav7/1762652579.5159612",
- "retrieved_timestamp": "1762652579.5159621",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-Brocav7",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-Brocav7"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6723715297632504
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6444026981327182
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38444108761329304
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3674496644295302
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47960416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5257646276595744
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Emerged/15af5216-fc3d-4102-bbed-eb5b7d0ecf48.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Emerged/15af5216-fc3d-4102-bbed-eb5b7d0ecf48.json
deleted file mode 100644
index 9fdbeaac28195a51178f51d735e32fdc4e693d33..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Emerged/15af5216-fc3d-4102-bbed-eb5b7d0ecf48.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Emerged/1762652579.516177",
- "retrieved_timestamp": "1762652579.516178",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-Emerged",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-Emerged"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7000237148543642
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6260033680703311
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.324773413897281
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3573825503355705
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46909375000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5186170212765957
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Emergedv3/7b125482-fd80-4f71-b398-9421333ee736.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Emergedv3/7b125482-fd80-4f71-b398-9421333ee736.json
deleted file mode 100644
index 113bc690821f647d26091e36cd69a6cd60750efc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Emergedv3/7b125482-fd80-4f71-b398-9421333ee736.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Emergedv3/1762652579.516385",
- "retrieved_timestamp": "1762652579.516386",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-Emergedv3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-Emergedv3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6388493641316153
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6190728411056029
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43580060422960726
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36073825503355705
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4728125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5173703457446809
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-FinalMerge/36ebe0b7-51ae-4ea5-ba42-c9fd0d717259.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-FinalMerge/36ebe0b7-51ae-4ea5-ba42-c9fd0d717259.json
deleted file mode 100644
index 57e08c13fb0d62d5383b3cddc685d703e4a2687b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-FinalMerge/36ebe0b7-51ae-4ea5-ba42-c9fd0d717259.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-FinalMerge/1762652579.516642",
- "retrieved_timestamp": "1762652579.516643",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-FinalMerge",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-FinalMerge"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48909781601705693
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5714945310011449
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3814199395770393
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3548657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43790625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4574468085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyper/8412921a-ad8c-4106-a3a1-9259d2ddb074.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyper/8412921a-ad8c-4106-a3a1-9259d2ddb074.json
deleted file mode 100644
index be5b29aafe59125efbbda2c8873bfa845f8bd3a5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyper/8412921a-ad8c-4106-a3a1-9259d2ddb074.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Hyper/1762652579.516851",
- "retrieved_timestamp": "1762652579.516851",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-Hyper",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-Hyper"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5391317260424563
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6507453346766106
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34365558912386707
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39177852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48983333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5374002659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-HyperMarck-dl/5b6ef372-86e5-4fc1-85ba-5a76517bb10f.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-HyperMarck-dl/5b6ef372-86e5-4fc1-85ba-5a76517bb10f.json
deleted file mode 100644
index 27f04b8e84e0c8b24953436e65e4b1a1f124599d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-HyperMarck-dl/5b6ef372-86e5-4fc1-85ba-5a76517bb10f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-HyperMarck-dl/1762652579.5170581",
- "retrieved_timestamp": "1762652579.517059",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-HyperMarck-dl",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-HyperMarck-dl"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6650276821057017
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6096480033153927
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5287009063444109
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3674496644295302
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4415625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5090591755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyperionv3/d6700ad3-d858-4420-96b1-d690984ebcaa.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyperionv3/d6700ad3-d858-4420-96b1-d690984ebcaa.json
deleted file mode 100644
index bc6ba41ada3a449021e436b1ac914add63ffe954..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyperionv3/d6700ad3-d858-4420-96b1-d690984ebcaa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Hyperionv3/1762652579.517266",
- "retrieved_timestamp": "1762652579.517267",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-Hyperionv3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-Hyperionv3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6836371937570092
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6522165609411941
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37009063444108764
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37080536912751677
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47296875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5339926861702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyperionv4/7c4a43f8-be43-44d7-a514-f02b70ec367c.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyperionv4/7c4a43f8-be43-44d7-a514-f02b70ec367c.json
deleted file mode 100644
index ff7c00a8f6b759095aa991d173a5baf3604f68b1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyperionv4/7c4a43f8-be43-44d7-a514-f02b70ec367c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Hyperionv4/1762652579.517484",
- "retrieved_timestamp": "1762652579.517484",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-Hyperionv4",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-Hyperionv4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5415796752616391
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6471791978856551
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3474320241691843
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3976510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48319791666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5364029255319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyperionv5/5b1e2a5e-cd92-4ad4-b12d-0540461f9f5e.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyperionv5/5b1e2a5e-cd92-4ad4-b12d-0540461f9f5e.json
deleted file mode 100644
index 9d9d6d63e8c8cf9b24ac117d236e28a2b5c91b53..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Hyperionv5/5b1e2a5e-cd92-4ad4-b12d-0540461f9f5e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Hyperionv5/1762652579.517704",
- "retrieved_timestamp": "1762652579.517704",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-Hyperionv5",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-Hyperionv5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6729211824625327
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.644265785086055
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3821752265861027
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3716442953020134
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4795416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5301695478723404
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-MegaMerge-pt2/f269bb45-d627-49b9-953b-5c8591433aa7.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-MegaMerge-pt2/f269bb45-d627-49b9-953b-5c8591433aa7.json
deleted file mode 100644
index 529ec4744b2edacf9f005a090473b93f47810a65..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-MegaMerge-pt2/f269bb45-d627-49b9-953b-5c8591433aa7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-MegaMerge-pt2/1762652579.517905",
- "retrieved_timestamp": "1762652579.517906",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-MegaMerge-pt2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-MegaMerge-pt2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.568307645935008
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6577703330510146
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3995468277945619
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37919463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.472875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5420545212765957
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-MergeStock/c1db0f86-a3d9-4aa4-9fe3-0442fc63ad25.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-MergeStock/c1db0f86-a3d9-4aa4-9fe3-0442fc63ad25.json
deleted file mode 100644
index e9c57e443454838d92033092cb2fc25e11d1a316..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-MergeStock/c1db0f86-a3d9-4aa4-9fe3-0442fc63ad25.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-MergeStock/1762652579.518343",
- "retrieved_timestamp": "1762652579.518346",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-MergeStock",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-MergeStock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5685326046002386
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6579336391923106
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41465256797583083
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3733221476510067
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4676354166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.539561170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-ReasoningMerge/df6199fa-3797-4b88-b5fc-e429f513932b.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-ReasoningMerge/df6199fa-3797-4b88-b5fc-e429f513932b.json
deleted file mode 100644
index 9d854e9923cc0570a403c1a3ef34d5b8f8d9ada0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-ReasoningMerge/df6199fa-3797-4b88-b5fc-e429f513932b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-ReasoningMerge/1762652579.518682",
- "retrieved_timestamp": "1762652579.518684",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-ReasoningMerge",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-ReasoningMerge"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46054690443578594
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6578226399295218
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.520392749244713
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4077181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5165937500000001
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5344913563829787
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Ultimav2/b76ac8f6-7355-4bbf-ad8f-d8fc967120a1.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Ultimav2/b76ac8f6-7355-4bbf-ad8f-d8fc967120a1.json
deleted file mode 100644
index b7a0723ea885480e7e572d0bccb471e307efa81a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Ultimav2/b76ac8f6-7355-4bbf-ad8f-d8fc967120a1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Ultimav2/1762652579.519061",
- "retrieved_timestamp": "1762652579.5190778",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-Ultimav2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-Ultimav2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5500228283177524
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6555027486976712
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38444108761329304
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3850671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4965625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5417220744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Unity/efd5d269-fc83-43f0-9054-dc3bdf40f180.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Unity/efd5d269-fc83-43f0-9054-dc3bdf40f180.json
deleted file mode 100644
index 8c4503dbd6727b8c3c6c9580e18efcf7f2924342..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Unity/efd5d269-fc83-43f0-9054-dc3bdf40f180.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Unity/1762652579.519516",
- "retrieved_timestamp": "1762652579.519517",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-Unity",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-Unity"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6738952645646883
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6019955540977778
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4312688821752266
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34731543624161076
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4679479166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.507563164893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Wernicke-SLERP/8359ce66-d904-4092-92be-5e2dbb372677.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Wernicke-SLERP/8359ce66-d904-4092-92be-5e2dbb372677.json
deleted file mode 100644
index a9b8408a1b0ce7b94cd8ca17ff37941f4e02b95d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Wernicke-SLERP/8359ce66-d904-4092-92be-5e2dbb372677.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Wernicke-SLERP/1762652579.5203562",
- "retrieved_timestamp": "1762652579.5203571",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-Wernicke-SLERP",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-Wernicke-SLERP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5588904107767391
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6440929009604598
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4486404833836858
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34395973154362414
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41403125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5093916223404256
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.491
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Wernicke/6c2287bb-69b0-4b23-ba15-ff4a600e4aa7.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Wernicke/6c2287bb-69b0-4b23-ba15-ff4a600e4aa7.json
deleted file mode 100644
index 9e3cbd3e3c04117efcd809c680c2378378bd3307..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Wernicke/6c2287bb-69b0-4b23-ba15-ff4a600e4aa7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Wernicke/1762652579.519787",
- "retrieved_timestamp": "1762652579.519788",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-Wernicke",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-Wernicke"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5234699486252034
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6568359662501574
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3814199395770393
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3934563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46890625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5423869680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Wernickev3/a4f5037a-381b-4726-b90d-ba559058772c.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Wernickev3/a4f5037a-381b-4726-b90d-ba559058772c.json
deleted file mode 100644
index dc17ed671192c241f17539d4526a5597e23afeb8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-Wernickev3/a4f5037a-381b-4726-b90d-ba559058772c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-Wernickev3/1762652579.520611",
- "retrieved_timestamp": "1762652579.520612",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-Wernickev3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-Wernickev3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7048198779239085
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6184146992839421
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3542296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3624161073825503
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4716666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.515126329787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-partialmergept1/852ffa19-285b-4037-ac60-63f24cafcecb.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-partialmergept1/852ffa19-285b-4037-ac60-63f24cafcecb.json
deleted file mode 100644
index 684385f0a1c8ff2c063f20a1d84f18b9603eb6af..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwen2.5-14B-partialmergept1/852ffa19-285b-4037-ac60-63f24cafcecb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwen2.5-14B-partialmergept1/1762652579.5208588",
- "retrieved_timestamp": "1762652579.52086",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwen2.5-14B-partialmergept1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwen2.5-14B-partialmergept1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.633728507028019
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6151178406213536
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45392749244712993
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3615771812080537
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47569791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5207779255319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwenfinity-2.5-14B/4fba9290-886e-490d-aaeb-068f8c679006.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwenfinity-2.5-14B/4fba9290-886e-490d-aaeb-068f8c679006.json
deleted file mode 100644
index c1626f23694327f36297eb01b481de8b53fe7c9a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_Qwenfinity-2.5-14B/4fba9290-886e-490d-aaeb-068f8c679006.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_Qwenfinity-2.5-14B/1762652579.521086",
- "retrieved_timestamp": "1762652579.521087",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/Qwenfinity-2.5-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/Qwenfinity-2.5-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4813794066410457
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5655007271970033
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41012084592145015
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.348993288590604
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45058333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4498005319148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B-EvolMerge/44823eb6-717b-4508-a745-7821545dd3c2.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B-EvolMerge/44823eb6-717b-4508-a745-7821545dd3c2.json
deleted file mode 100644
index f68077ce16968c35782d06081ae53f5b32e7cba8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B-EvolMerge/44823eb6-717b-4508-a745-7821545dd3c2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14B-EvolMerge/1762652579.5218382",
- "retrieved_timestamp": "1762652579.5218382",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/SeQwence-14B-EvolMerge",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/SeQwence-14B-EvolMerge"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5381576439403006
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6572183434723883
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36706948640483383
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48208333333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5418882978723404
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B-EvolMergev1/e2621a1f-af39-48fe-a56b-18e9b396a476.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B-EvolMergev1/e2621a1f-af39-48fe-a56b-18e9b396a476.json
deleted file mode 100644
index 0a738c856254aaad82e5c96223da8dc00c15b64d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B-EvolMergev1/e2621a1f-af39-48fe-a56b-18e9b396a476.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14B-EvolMergev1/1762652579.5221288",
- "retrieved_timestamp": "1762652579.52213",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/SeQwence-14B-EvolMergev1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/SeQwence-14B-EvolMergev1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5554683794554005
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6545547382762975
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4214501510574018
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3766778523489933
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46227083333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.539311835106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B-v5/6a7ae44e-93f6-4371-b3a6-585a099aa7c7.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B-v5/6a7ae44e-93f6-4371-b3a6-585a099aa7c7.json
deleted file mode 100644
index 4c6ed82a3cd3f43270c51121b287e7fb304f84af..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B-v5/6a7ae44e-93f6-4371-b3a6-585a099aa7c7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14B-v5/1762652579.522369",
- "retrieved_timestamp": "1762652579.522369",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/SeQwence-14B-v5",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/SeQwence-14B-v5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5919881470055011
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6517093605796943
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33081570996978854
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3699664429530201
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47141666666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5414727393617021
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B/b9f3e9d1-e1f9-44cd-9067-c949adfbe553.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B/b9f3e9d1-e1f9-44cd-9067-c949adfbe553.json
deleted file mode 100644
index b4de2b6ae4b58c606f7d1f625e471a78dbea58e2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14B/b9f3e9d1-e1f9-44cd-9067-c949adfbe553.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14B/1762652579.521544",
- "retrieved_timestamp": "1762652579.521545",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/SeQwence-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/SeQwence-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5351600420218354
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6505665291288972
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35347432024169184
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36073825503355705
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46661458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5418882978723404
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14Bv1/f4505219-fc0d-4f7b-ad71-3c9fef064c28.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14Bv1/f4505219-fc0d-4f7b-ad71-3c9fef064c28.json
deleted file mode 100644
index b6ba23a9eda9baccaa9fcaeb0a6dc9fdaf7d318b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14Bv1/f4505219-fc0d-4f7b-ad71-3c9fef064c28.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14Bv1/1762652579.522592",
- "retrieved_timestamp": "1762652579.522593",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/SeQwence-14Bv1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/SeQwence-14Bv1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6678003253589365
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6344673727103446
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3610271903323263
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3615771812080537
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47042708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.531998005319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14Bv2/49eccc70-6321-451b-87e9-29907cfb53a0.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14Bv2/49eccc70-6321-451b-87e9-29907cfb53a0.json
deleted file mode 100644
index a5480649904b02d556880d6e10a57028402d626e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14Bv2/49eccc70-6321-451b-87e9-29907cfb53a0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14Bv2/1762652579.5228019",
- "retrieved_timestamp": "1762652579.5228028",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/SeQwence-14Bv2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/SeQwence-14Bv2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5785992278266112
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6304512627108576
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47583081570996977
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36073825503355705
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4601041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5334109042553191
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14Bv3/4857c00b-e4fb-417a-8b63-a5b7e9298b40.json b/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14Bv3/4857c00b-e4fb-417a-8b63-a5b7e9298b40.json
deleted file mode 100644
index 40dab72e5ee5d6ee553fc59e2bf76273e620b7e5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/CultriX_SeQwence-14Bv3/4857c00b-e4fb-417a-8b63-a5b7e9298b40.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/CultriX_SeQwence-14Bv3/1762652579.523057",
- "retrieved_timestamp": "1762652579.523058",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "CultriX/SeQwence-14Bv3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "CultriX/SeQwence-14Bv3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5719047682371663
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6302253848409948
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47658610271903323
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3649328859060403
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4624270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5334940159574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Danielbrdz_Barcenas-R1-Qwen-1.5b/c5330fb2-e914-4170-81f8-77a317ba557c.json b/leaderboard_data/HFOpenLLMv2/alibaba/Danielbrdz_Barcenas-R1-Qwen-1.5b/c5330fb2-e914-4170-81f8-77a317ba557c.json
deleted file mode 100644
index ff582637b172f869f08fbbda3a70923d9da406c3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Danielbrdz_Barcenas-R1-Qwen-1.5b/c5330fb2-e914-4170-81f8-77a317ba557c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Danielbrdz_Barcenas-R1-Qwen-1.5b/1762652579.5346482",
- "retrieved_timestamp": "1762652579.5346491",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Danielbrdz/Barcenas-R1-Qwen-1.5b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Danielbrdz/Barcenas-R1-Qwen-1.5b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24280132271262472
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35872011187392944
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3496978851963746
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.354125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19090757978723405
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm/4b7dd9db-5e94-4885-96f8-189af8d97c09.json b/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm/4b7dd9db-5e94-4885-96f8-189af8d97c09.json
deleted file mode 100644
index dc0d69f0e043ef213fecf3494e14d6354be0054f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm/4b7dd9db-5e94-4885-96f8-189af8d97c09.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DavidAU_DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm/1762652579.53886",
- "retrieved_timestamp": "1762652579.53886",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "DavidAU/DeepSeek-R1-Distill-Qwen-25.5B-Brainstorm"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34159474638403875
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.580689592371853
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5536253776435045
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3859060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5155104166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4623503989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 25.506
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B/78e7f7ee-3677-499a-aa36-2e8bf0902bf0.json b/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B/78e7f7ee-3677-499a-aa36-2e8bf0902bf0.json
deleted file mode 100644
index ada50a45c87b2e2ef703b61964733ddf315a3a74..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B/78e7f7ee-3677-499a-aa36-2e8bf0902bf0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DavidAU_Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B/1762652579.543009",
- "retrieved_timestamp": "1762652579.543009",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "DavidAU/Qwen2.5-MOE-2X1.5B-DeepSeek-Uncensored-Censored-4B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17832905579418165
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30326053640004424
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.024924471299093656
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3714583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11419547872340426
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2MoeForCausalLM",
- "params_billions": 4.089
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B/d65793ba-f363-4665-9ff5-1ac08e819d55.json b/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B/d65793ba-f363-4665-9ff5-1ac08e819d55.json
deleted file mode 100644
index 4d97c25e292922c64855527ac1f6db04a9a68612..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B/d65793ba-f363-4665-9ff5-1ac08e819d55.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DavidAU_Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B/1762652579.543224",
- "retrieved_timestamp": "1762652579.543225",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "DavidAU/Qwen2.5-MOE-2X7B-DeepSeek-Abliterated-Censored-19B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28351773294857646
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35922718767499157
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24169184290030213
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38469791666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1636469414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2MoeForCausalLM",
- "params_billions": 19.022
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32/c142222c-836d-493f-a9f8-857426e0573c.json b/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32/c142222c-836d-493f-a9f8-857426e0573c.json
deleted file mode 100644
index 9aab308d43490656c08480d6c239f93cd0a0176f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/DavidAU_Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32/c142222c-836d-493f-a9f8-857426e0573c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DavidAU_Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32/1762652579.543571",
- "retrieved_timestamp": "1762652579.543573",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "DavidAU/Qwen2.5-MOE-6x1.5B-DeepSeek-Reasoning-e32"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21067766858601844
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32861776640637924
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06646525679758308
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24748322147651006
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3404479166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11220079787234043
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2MoeForCausalLM",
- "params_billions": 8.714
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita/6669c8b8-91d6-4f14-8cfb-a6422352850d.json b/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita/6669c8b8-91d6-4f14-8cfb-a6422352850d.json
deleted file mode 100644
index 147df673e846b6411acd147f42388119f2398687..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita/6669c8b8-91d6-4f14-8cfb-a6422352850d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita/1762652579.5521228",
- "retrieved_timestamp": "1762652579.5521238",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DeepMount00/Qwen2-1.5B-Ita",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "DeepMount00/Qwen2-1.5B-Ita"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5173495214918638
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39805765159128703
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11404833836858005
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35037500000000005
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2771775265957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v2/78ec8596-ee15-4e94-8bc8-77c6bdffc541.json b/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v2/78ec8596-ee15-4e94-8bc8-77c6bdffc541.json
deleted file mode 100644
index b91ff2f79ef453be7fda26332f5b7882384e8783..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v2/78ec8596-ee15-4e94-8bc8-77c6bdffc541.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita_v2/1762652579.552372",
- "retrieved_timestamp": "1762652579.552373",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DeepMount00/Qwen2-1.5B-Ita_v2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "DeepMount00/Qwen2-1.5B-Ita_v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49998891829235315
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3953827803974795
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09667673716012085
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37018749999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30319148936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v3/f9cac378-3bdb-4c66-8193-502773c5c5eb.json b/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v3/f9cac378-3bdb-4c66-8193-502773c5c5eb.json
deleted file mode 100644
index 0b93ed10a83cec8b544558d5d8e2edeb416cf615..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v3/f9cac378-3bdb-4c66-8193-502773c5c5eb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita_v3/1762652579.552576",
- "retrieved_timestamp": "1762652579.552577",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DeepMount00/Qwen2-1.5B-Ita_v3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "DeepMount00/Qwen2-1.5B-Ita_v3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4890479483326463
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3948478837209111
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2533557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37415624999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3017785904255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v5/04f0529b-474c-42d2-99a8-e3bdd5c18eaf.json b/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v5/04f0529b-474c-42d2-99a8-e3bdd5c18eaf.json
deleted file mode 100644
index 3846d0a27748dc6c967053bbd52fb4463f4bbccb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v5/04f0529b-474c-42d2-99a8-e3bdd5c18eaf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita_v5/1762652579.552789",
- "retrieved_timestamp": "1762652579.55279",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DeepMount00/Qwen2-1.5B-Ita_v5",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "DeepMount00/Qwen2-1.5B-Ita_v5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4987400098405564
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40320443289745417
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11782477341389729
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25419463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34225
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29429853723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v6/041f6e95-b7d1-44c6-a995-0c8257e188aa.json b/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v6/041f6e95-b7d1-44c6-a995-0c8257e188aa.json
deleted file mode 100644
index d415525f251dd9f7f6dcadbd5d2faab79516b851..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/DeepMount00_Qwen2-1.5B-Ita_v6/041f6e95-b7d1-44c6-a995-0c8257e188aa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DeepMount00_Qwen2-1.5B-Ita_v6/1762652579.553008",
- "retrieved_timestamp": "1762652579.5530088",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DeepMount00/Qwen2-1.5B-Ita_v6",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "DeepMount00/Qwen2-1.5B-Ita_v6"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29990425404593146
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42486081646897506
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08459214501510574
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3754583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28715093085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.497
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Dongwei_DeepSeek-R1-Distill-Qwen-7B-GRPO/b36b915f-3c4a-40e8-ab78-8442dbe116e1.json b/leaderboard_data/HFOpenLLMv2/alibaba/Dongwei_DeepSeek-R1-Distill-Qwen-7B-GRPO/b36b915f-3c4a-40e8-ab78-8442dbe116e1.json
deleted file mode 100644
index 12c33828d63a82263a25f549025758c3e0f7db68..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Dongwei_DeepSeek-R1-Distill-Qwen-7B-GRPO/b36b915f-3c4a-40e8-ab78-8442dbe116e1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Dongwei_DeepSeek-R1-Distill-Qwen-7B-GRPO/1762652579.5556989",
- "retrieved_timestamp": "1762652579.5557",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Dongwei/DeepSeek-R1-Distill-Qwen-7B-GRPO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40376866713653103
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34425676981862185
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19561933534743203
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36628124999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23221409574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/EVA-UNIT-01_EVA-Qwen2.5-14B-v0.2/3ba36700-5019-4525-bf5e-6a87cce7ecc5.json b/leaderboard_data/HFOpenLLMv2/alibaba/EVA-UNIT-01_EVA-Qwen2.5-14B-v0.2/3ba36700-5019-4525-bf5e-6a87cce7ecc5.json
deleted file mode 100644
index 2ca33d6a71157d9c2db160dc89ae0d9ae37c9652..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/EVA-UNIT-01_EVA-Qwen2.5-14B-v0.2/3ba36700-5019-4525-bf5e-6a87cce7ecc5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EVA-UNIT-01_EVA-Qwen2.5-14B-v0.2/1762652579.5920892",
- "retrieved_timestamp": "1762652579.5920892",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4038429145777648
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6090237540046592
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3406344410876133
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39429530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4794479166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5135472074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/EVA-UNIT-01_EVA-Qwen2.5-72B-v0.2/9e315ba7-3eea-4934-822e-461e64bf8551.json b/leaderboard_data/HFOpenLLMv2/alibaba/EVA-UNIT-01_EVA-Qwen2.5-72B-v0.2/9e315ba7-3eea-4934-822e-461e64bf8551.json
deleted file mode 100644
index 24f5f73cfb135a235de30e5aac54298ab9396623..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/EVA-UNIT-01_EVA-Qwen2.5-72B-v0.2/9e315ba7-3eea-4934-822e-461e64bf8551.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EVA-UNIT-01_EVA-Qwen2.5-72B-v0.2/1762652579.59233",
- "retrieved_timestamp": "1762652579.592331",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "EVA-UNIT-01/EVA-Qwen2.5-72B-v0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6878837041272712
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7088012228048761
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4312688821752266
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4085570469798658
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47197916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.581283244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.706
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Etherll_Qwen2.5-7B-della-test/777b5587-70b2-472f-a6e4-820d653669cd.json b/leaderboard_data/HFOpenLLMv2/alibaba/Etherll_Qwen2.5-7B-della-test/777b5587-70b2-472f-a6e4-820d653669cd.json
deleted file mode 100644
index 839c471d162576f57fb6bef5ceab5dd35e64456f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Etherll_Qwen2.5-7B-della-test/777b5587-70b2-472f-a6e4-820d653669cd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Etherll_Qwen2.5-7B-della-test/1762652579.614594",
- "retrieved_timestamp": "1762652579.6145952",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Etherll/Qwen2.5-7B-della-test",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Etherll/Qwen2.5-7B-della-test"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7624968417133207
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5447331985391859
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48942598187311176
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40469791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4360871010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/HPAI-BSC_Qwen2.5-Aloe-Beta-7B/a99dbb21-4f7d-4ac0-b403-2f8bf7aa92b1.json b/leaderboard_data/HFOpenLLMv2/alibaba/HPAI-BSC_Qwen2.5-Aloe-Beta-7B/a99dbb21-4f7d-4ac0-b403-2f8bf7aa92b1.json
deleted file mode 100644
index 1b8712285cf63029893c7a77d1b7b39e688056a9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/HPAI-BSC_Qwen2.5-Aloe-Beta-7B/a99dbb21-4f7d-4ac0-b403-2f8bf7aa92b1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HPAI-BSC_Qwen2.5-Aloe-Beta-7B/1762652579.6368651",
- "retrieved_timestamp": "1762652579.636866",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HPAI-BSC/Qwen2.5-Aloe-Beta-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "HPAI-BSC/Qwen2.5-Aloe-Beta-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4553506917201914
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5048995904321122
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3542296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42603125000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4354222074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/HeraiHench_DeepSeek-R1-Qwen-Coder-8B/a0730f18-1058-44b4-b6b6-0881ae2e6338.json b/leaderboard_data/HFOpenLLMv2/alibaba/HeraiHench_DeepSeek-R1-Qwen-Coder-8B/a0730f18-1058-44b4-b6b6-0881ae2e6338.json
deleted file mode 100644
index 0beb66054c47ea1b2b43db80daca926ada38b318..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/HeraiHench_DeepSeek-R1-Qwen-Coder-8B/a0730f18-1058-44b4-b6b6-0881ae2e6338.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HeraiHench_DeepSeek-R1-Qwen-Coder-8B/1762652579.6392472",
- "retrieved_timestamp": "1762652579.639248",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HeraiHench/DeepSeek-R1-Qwen-Coder-8B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "HeraiHench/DeepSeek-R1-Qwen-Coder-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1869472998311148
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29134447696551025
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37384375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11228390957446809
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 8.164
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/HeraiHench_Double-Down-Qwen-Math-7B/6e852e78-e666-413e-ac29-ad374bbc74f2.json b/leaderboard_data/HFOpenLLMv2/alibaba/HeraiHench_Double-Down-Qwen-Math-7B/6e852e78-e666-413e-ac29-ad374bbc74f2.json
deleted file mode 100644
index 785cc16c5a55f21577327808a464fd7e9c3d43b2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/HeraiHench_Double-Down-Qwen-Math-7B/6e852e78-e666-413e-ac29-ad374bbc74f2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HeraiHench_Double-Down-Qwen-Math-7B/1762652579.63955",
- "retrieved_timestamp": "1762652579.639551",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HeraiHench/Double-Down-Qwen-Math-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "HeraiHench/Double-Down-Qwen-Math-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1669636564316015
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2844613514203868
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0007552870090634441
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37365625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11120345744680851
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/HeraiHench_Marge-Qwen-Math-7B/07f4a9dc-16d7-4b75-922f-09f8e9ebed7d.json b/leaderboard_data/HFOpenLLMv2/alibaba/HeraiHench_Marge-Qwen-Math-7B/07f4a9dc-16d7-4b75-922f-09f8e9ebed7d.json
deleted file mode 100644
index 999966456f9658eec6545a41b30c1fcaf6aac8a2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/HeraiHench_Marge-Qwen-Math-7B/07f4a9dc-16d7-4b75-922f-09f8e9ebed7d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HeraiHench_Marge-Qwen-Math-7B/1762652579.6397812",
- "retrieved_timestamp": "1762652579.639782",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HeraiHench/Marge-Qwen-Math-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "HeraiHench/Marge-Qwen-Math-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12622175826806206
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3068846024368302
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.005287009063444109
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23909395973154363
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39390624999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10555186170212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-IRPO-1epoch/0cbb4771-926d-4cf6-a78b-a5f4ac4d5902.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-IRPO-1epoch/0cbb4771-926d-4cf6-a78b-a5f4ac4d5902.json
deleted file mode 100644
index e76df15df62873caff85767da35a89658df19a26..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-IRPO-1epoch/0cbb4771-926d-4cf6-a78b-a5f4ac4d5902.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-IRPO-1epoch/1762652579.652392",
- "retrieved_timestamp": "1762652579.6523929",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen-0.5B-IRPO-1epoch",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen-0.5B-IRPO-1epoch"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25891301746033857
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31638216610052033
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03172205438066465
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24664429530201343
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3286354166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15001662234042554
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-IRPO-5epoch/301f71c8-fc1f-42e8-9029-f9d03574872b.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-IRPO-5epoch/301f71c8-fc1f-42e8-9029-f9d03574872b.json
deleted file mode 100644
index 8d27e3a66a19bd2c14c1ac7e1cacd5badc8fe9bf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-IRPO-5epoch/301f71c8-fc1f-42e8-9029-f9d03574872b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-IRPO-5epoch/1762652579.652645",
- "retrieved_timestamp": "1762652579.652645",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen-0.5B-IRPO-5epoch",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen-0.5B-IRPO-5epoch"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24867130325314607
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31891656220326015
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0324773413897281
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23993288590604026
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32866666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1506815159574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-eDPO-1epoch/65e2f2b2-cb5b-40f3-b23a-8c0d185de219.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-eDPO-1epoch/65e2f2b2-cb5b-40f3-b23a-8c0d185de219.json
deleted file mode 100644
index 1e4486fdd78393f6c15b2cc545eb1392d1aa358a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-eDPO-1epoch/65e2f2b2-cb5b-40f3-b23a-8c0d185de219.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-eDPO-1epoch/1762652579.652854",
- "retrieved_timestamp": "1762652579.6528552",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen-0.5B-eDPO-1epoch",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen-0.5B-eDPO-1epoch"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26233504878167707
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3180637583450692
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03474320241691843
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2424496644295302
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33269791666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15525265957446807
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-eDPO-5epoch/062a1dcd-2553-4657-8f89-a481ff62a193.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-eDPO-5epoch/062a1dcd-2553-4657-8f89-a481ff62a193.json
deleted file mode 100644
index df89901718076059c78e657103082cc36e82f043..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen-0.5B-eDPO-5epoch/062a1dcd-2553-4657-8f89-a481ff62a193.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen-0.5B-eDPO-5epoch/1762652579.653099",
- "retrieved_timestamp": "1762652579.6531",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen-0.5B-eDPO-5epoch",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen-0.5B-eDPO-5epoch"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24774708883540117
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3096491823869347
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.023413897280966767
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24916107382550334
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3326354166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15226063829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IPO_5e-7-1ep_0alp_0lam/82b47608-08b5-4368-bead-aa117736c06d.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IPO_5e-7-1ep_0alp_0lam/82b47608-08b5-4368-bead-aa117736c06d.json
deleted file mode 100644
index 08a4688604ac48fa76976776a5894fb868e3305a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IPO_5e-7-1ep_0alp_0lam/82b47608-08b5-4368-bead-aa117736c06d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IPO_5e-7-1ep_0alp_0lam/1762652579.680979",
- "retrieved_timestamp": "1762652579.68098",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-IPO_5e-7-1ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2573892826589006
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3279091360416723
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.055891238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26929530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31685416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16505984042553193
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IPO_5e-7-3ep_0alp_0lam/747310d0-7c30-4261-b2e8-a783d8753e9a.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IPO_5e-7-3ep_0alp_0lam/747310d0-7c30-4261-b2e8-a783d8753e9a.json
deleted file mode 100644
index d7d16f9d34f0a705607a1a903b49bbde857c9d5c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IPO_5e-7-3ep_0alp_0lam/747310d0-7c30-4261-b2e8-a783d8753e9a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IPO_5e-7-3ep_0alp_0lam/1762652579.6812391",
- "retrieved_timestamp": "1762652579.68124",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-IPO_5e-7-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3072481017034801
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32638442794247285
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0581570996978852
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25671140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31564583333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1624002659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam/a7b6a07a-70fc-4d34-9a92-265b848d22d7.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam/a7b6a07a-70fc-4d34-9a92-265b848d22d7.json
deleted file mode 100644
index 8930e1edd1b8bc87b15eda0d2e606e9ad7afa5f3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam/a7b6a07a-70fc-4d34-9a92-265b848d22d7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam/1762652579.68145",
- "retrieved_timestamp": "1762652579.68145",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-IRPO_1e-6-3ep_1alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25509093649294984
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3242353334886223
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04682779456193353
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26677852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31825
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15741356382978725
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam/99139c71-a4f2-45d7-95b8-a8b7720681aa.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam/99139c71-a4f2-45d7-95b8-a8b7720681aa.json
deleted file mode 100644
index 04674614582d4596efac27997800a8aff804f12c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam/99139c71-a4f2-45d7-95b8-a8b7720681aa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam/1762652579.681671",
- "retrieved_timestamp": "1762652579.681671",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-IRPO_1e-7-3ep_1alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26358395723347383
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3198054258965539
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0513595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32615625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15857712765957446
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam/6407040d-023d-476a-ac79-ef85e104eace.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam/6407040d-023d-476a-ac79-ef85e104eace.json
deleted file mode 100644
index 07f30a24f719d4dcb0c4535e0444d32f8a25538b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam/6407040d-023d-476a-ac79-ef85e104eace.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam/1762652579.681885",
- "retrieved_timestamp": "1762652579.681886",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-IRPO_3e-6-1ep_1alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23228478215579107
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3254731912466387
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03700906344410876
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25083892617449666
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31688541666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16115359042553193
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam/64f71756-0a54-4a42-a96a-7056071c7dd0.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam/64f71756-0a54-4a42-a96a-7056071c7dd0.json
deleted file mode 100644
index 2b8a74b303fa653d0d2365af83601420ffeeff95..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam/64f71756-0a54-4a42-a96a-7056071c7dd0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam/1762652579.682102",
- "retrieved_timestamp": "1762652579.682102",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-IRPO_3e-6-2ep_1alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24137732328000816
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3314225693635648
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03474320241691843
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2516778523489933
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33415625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15317486702127658
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam/8c18d418-a0a4-435a-b31f-7d879c793b4c.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam/8c18d418-a0a4-435a-b31f-7d879c793b4c.json
deleted file mode 100644
index 3c08ebb389205119f2237b2e9f72ecd4295aa513..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam/8c18d418-a0a4-435a-b31f-7d879c793b4c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam/1762652579.6823108",
- "retrieved_timestamp": "1762652579.6823108",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-IRPO_3e-6-3ep_1alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2677805999193252
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3361518077587983
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0513595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25419463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33815625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15608377659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam/75e153a7-d699-4822-90b6-9d7da259e124.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam/75e153a7-d699-4822-90b6-9d7da259e124.json
deleted file mode 100644
index ea88ce1ed2837e6bc3c1d7e024ef53e842fadf10..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam/75e153a7-d699-4822-90b6-9d7da259e124.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam/1762652579.682508",
- "retrieved_timestamp": "1762652579.682509",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-IRPO_3e-7-1ep_1alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25606501859510544
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3231121828613069
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05362537764350453
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27181208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31955208333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1589095744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam/836cc2ab-edbc-45fa-af8c-034d0239635b.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam/836cc2ab-edbc-45fa-af8c-034d0239635b.json
deleted file mode 100644
index a9670a48ea5d8cf31d4c22d992028605073540c9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam/836cc2ab-edbc-45fa-af8c-034d0239635b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam/1762652579.682722",
- "retrieved_timestamp": "1762652579.682723",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-IRPO_3e-7-3ep_1alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2639086512675257
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3257435380157632
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04758308157099698
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32085416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15866023936170212
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam/f270e1bd-7e75-4c6c-a701-9def96275025.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam/f270e1bd-7e75-4c6c-a701-9def96275025.json
deleted file mode 100644
index 6f3b3f5c9b196d6b007c6b4df5fafc5d0dd04cc1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam/f270e1bd-7e75-4c6c-a701-9def96275025.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam/1762652579.682945",
- "retrieved_timestamp": "1762652579.682946",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-IRPO_5e-7-1ep_1alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2517686405404327
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3213578303108222
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05740181268882175
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27348993288590606
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31688541666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1584940159574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam/02ec1b4f-f1e0-4c46-bff2-1475e95cff80.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam/02ec1b4f-f1e0-4c46-bff2-1475e95cff80.json
deleted file mode 100644
index 26ec77b4e530389e2b0cc1d211514918db28e5ce..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam/02ec1b4f-f1e0-4c46-bff2-1475e95cff80.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam/1762652579.683157",
- "retrieved_timestamp": "1762652579.683158",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-IRPO_5e-7-2ep_1alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24382527249919106
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3266053460297184
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.061933534743202415
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31955208333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15541888297872342
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam/9da4a976-09a2-4f1c-a15e-d498a2adfdd4.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam/9da4a976-09a2-4f1c-a15e-d498a2adfdd4.json
deleted file mode 100644
index 34dc60ea47f11e66ba844d3ee709967c5ea22ae3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam/9da4a976-09a2-4f1c-a15e-d498a2adfdd4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam/1762652579.6833699",
- "retrieved_timestamp": "1762652579.683371",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-IRPO_5e-7-3ep_1alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24654804806801509
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32458923603023143
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.052870090634441085
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27181208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31821875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15633311170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam/c3a945da-be07-4132-b558-f20202530b4d.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam/c3a945da-be07-4132-b558-f20202530b4d.json
deleted file mode 100644
index 1875dd621648fc8469fdb6dfb2dc9d672037bd10..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam/c3a945da-be07-4132-b558-f20202530b4d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam/1762652579.683736",
- "retrieved_timestamp": "1762652579.683738",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-MDPO_0.1_3e-6-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2505695997730466
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32614538576285174
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04984894259818731
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28187919463087246
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33818750000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15217752659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam/723afa16-d986-421c-a6ec-d1b00cb9d765.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam/723afa16-d986-421c-a6ec-d1b00cb9d765.json
deleted file mode 100644
index 307700f278ad13e0560d78a2bf90319701cea4f5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam/723afa16-d986-421c-a6ec-d1b00cb9d765.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam/1762652579.684093",
- "retrieved_timestamp": "1762652579.684094",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-MDPO_0.1_5e-7-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24567370133468086
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3179765517720094
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03474320241691843
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3315208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15658244680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam/03e5cd5c-adc0-49d8-9e51-3e315d0bffd6.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam/03e5cd5c-adc0-49d8-9e51-3e315d0bffd6.json
deleted file mode 100644
index c4ba16fd4d92f0927de4299b61c7c50d2baff3e9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam/03e5cd5c-adc0-49d8-9e51-3e315d0bffd6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam/1762652579.684393",
- "retrieved_timestamp": "1762652579.684394",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-MDPO_0.3_3e-6-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24539887498503968
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32157618750132033
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05060422960725076
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33818750000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1544215425531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam/6992c085-939e-48b0-8c8f-53d6ca9737de.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam/6992c085-939e-48b0-8c8f-53d6ca9737de.json
deleted file mode 100644
index 75f084ba17bc0f0dc481c2d10e1e8ebdae5e35e9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam/6992c085-939e-48b0-8c8f-53d6ca9737de.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam/1762652579.684617",
- "retrieved_timestamp": "1762652579.684618",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-MDPO_0.3_5e-7-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2341830786756916
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3189252460411593
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04003021148036254
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33015625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15799534574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam/59e7ed2b-8385-4c83-b357-6dfa52e429cc.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam/59e7ed2b-8385-4c83-b357-6dfa52e429cc.json
deleted file mode 100644
index 8f1441b72aa1649b0e84235debf4c3cde465b383..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam/59e7ed2b-8385-4c83-b357-6dfa52e429cc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam/1762652579.684837",
- "retrieved_timestamp": "1762652579.684837",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-MDPO_0.5_1e-5-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23196008812173918
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3233548545784329
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03927492447129909
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33688541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15425531914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam/495ed31f-9cbc-4f6f-b4be-2b9ee8f5011c.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam/495ed31f-9cbc-4f6f-b4be-2b9ee8f5011c.json
deleted file mode 100644
index bb3db6056c0d4d1c325f223c3cf80cff86a40eae..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam/495ed31f-9cbc-4f6f-b4be-2b9ee8f5011c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam/1762652579.6850612",
- "retrieved_timestamp": "1762652579.685062",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-1ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24175188499847072
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3175499101875348
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04229607250755287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3288229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15799534574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam/6c5809dc-67b3-4567-8d1f-4a8104a11507.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam/6c5809dc-67b3-4567-8d1f-4a8104a11507.json
deleted file mode 100644
index 1f5f59c0f196a96c13879714bd75535cc392761e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam/6c5809dc-67b3-4567-8d1f-4a8104a11507.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam/1762652579.6852841",
- "retrieved_timestamp": "1762652579.685285",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-2ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24932069132124984
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196623899087389
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04229607250755287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33148958333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15708111702127658
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2Model",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam/44c78761-2672-49c4-85f4-9b0d575dd914.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam/44c78761-2672-49c4-85f4-9b0d575dd914.json
deleted file mode 100644
index e3f25fc2be225645f83989aa9a1679091c2f1aa4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam/44c78761-2672-49c4-85f4-9b0d575dd914.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam/1762652579.685507",
- "retrieved_timestamp": "1762652579.685508",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-MDPO_0.5_3e-7-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2520434668900739
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3197552188491219
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04229607250755287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3261875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15508643617021275
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam/b33d4765-4633-4c2b-a118-1ed82b0c842b.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam/b33d4765-4633-4c2b-a118-1ed82b0c842b.json
deleted file mode 100644
index 6e4a9a9784f7f74e5ae7a437a36cb376de546ee9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam/b33d4765-4633-4c2b-a118-1ed82b0c842b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam/1762652579.685728",
- "retrieved_timestamp": "1762652579.685728",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-MDPO_0.5_4e-6-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25803867072700437
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3248229336342538
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04758308157099698
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2751677852348993
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34215625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15392287234042554
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam/8d200434-ef84-403e-9fb6-86c15c4ccfed.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam/8d200434-ef84-403e-9fb6-86c15c4ccfed.json
deleted file mode 100644
index 4f350407669592585bf42e3af5eed5a11e2d69e7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam/8d200434-ef84-403e-9fb6-86c15c4ccfed.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam/1762652579.685941",
- "retrieved_timestamp": "1762652579.685942",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-MDPO_0.5_6e-6-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23196008812173918
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.326545450978746
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03851963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27097315436241615
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33948958333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15367353723404256
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam/3a666f3f-f2ea-4fed-b2fe-750b759eae7a.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam/3a666f3f-f2ea-4fed-b2fe-750b759eae7a.json
deleted file mode 100644
index 389fd204bf3cafa358e614cc53943d3ec6a1e88e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam/3a666f3f-f2ea-4fed-b2fe-750b759eae7a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam/1762652579.686151",
- "retrieved_timestamp": "1762652579.686152",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-6-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2487710386219675
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3272739110084265
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04607250755287009
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27181208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33415625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15309175531914893
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam/7fbad2de-a9da-4962-ae18-47298811ba5b.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam/7fbad2de-a9da-4962-ae18-47298811ba5b.json
deleted file mode 100644
index f6c5e30ce45d0f49f649a91261da8d79692a38a9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam/7fbad2de-a9da-4962-ae18-47298811ba5b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam/1762652579.686357",
- "retrieved_timestamp": "1762652579.686357",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-MDPO_0.5_7e-7-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25236816092412573
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3129690310926447
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0445619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32885416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15641622340425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam/1fad00cf-e472-42dc-8b87-a0501cb051ab.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam/1fad00cf-e472-42dc-8b87-a0501cb051ab.json
deleted file mode 100644
index 6050cbf109bb29e66f27cbfe162c53d2dc0003a8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam/1fad00cf-e472-42dc-8b87-a0501cb051ab.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam/1762652579.686578",
- "retrieved_timestamp": "1762652579.686579",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-MDPO_0.7_3e-6-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2513940788219702
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.322095658026178
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04380664652567976
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2751677852348993
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33148958333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15383976063829788
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam/c68fad94-ce6a-4053-b991-2c1e660fe7d9.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam/c68fad94-ce6a-4053-b991-2c1e660fe7d9.json
deleted file mode 100644
index 45617c92e15f2d4a9e6c7651f2d15d72d7bb9ffb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam/c68fad94-ce6a-4053-b991-2c1e660fe7d9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam/1762652579.686833",
- "retrieved_timestamp": "1762652579.6868339",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-MDPO_0.7_5e-7-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24567370133468086
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3180087717709833
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03851963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26677852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3275208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15724734042553193
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam/a6a3ee79-a93b-4220-ac09-1c5d2f70cdf8.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam/a6a3ee79-a93b-4220-ac09-1c5d2f70cdf8.json
deleted file mode 100644
index cbf51fc8d146d9a2b09d54ee2b4dae463673e658..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam/a6a3ee79-a93b-4220-ac09-1c5d2f70cdf8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam/1762652579.6870458",
- "retrieved_timestamp": "1762652579.687047",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-MDPO_0.9_5e-7-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26363382491788456
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31806866682195567
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04758308157099698
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3235208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15741356382978725
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_3e-6-1ep_3vpo_const/e3471a51-fad2-44cf-bd0c-ad1250d22f83.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_3e-6-1ep_3vpo_const/e3471a51-fad2-44cf-bd0c-ad1250d22f83.json
deleted file mode 100644
index 41b7c558530830ecf0c22c4401fcce54139dc230..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_3e-6-1ep_3vpo_const/e3471a51-fad2-44cf-bd0c-ad1250d22f83.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_3e-6-1ep_3vpo_const/1762652579.6873431",
- "retrieved_timestamp": "1762652579.687347",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-VDPO_3e-6-1ep_3vpo_const"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24829674153468353
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3174312444218736
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0377643504531722
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25419463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33279166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1558344414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam/5a3a76e9-f93d-435c-898c-b76bc5dc0cda.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam/5a3a76e9-f93d-435c-898c-b76bc5dc0cda.json
deleted file mode 100644
index 06bfbe1e5ae5476cf83b7b2c387a933a149b0d7e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam/5a3a76e9-f93d-435c-898c-b76bc5dc0cda.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam/1762652579.687733",
- "retrieved_timestamp": "1762652579.687735",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2517686405404327
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3218020653711833
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.052870090634441085
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27181208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32348958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15949135638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_10vpo_const/fc83f198-e606-4c3d-aede-cb646b080b3b.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_10vpo_const/fc83f198-e606-4c3d-aede-cb646b080b3b.json
deleted file mode 100644
index cf6e4913a0c77bf2fd452a24d2ed6feff301d599..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_10vpo_const/fc83f198-e606-4c3d-aede-cb646b080b3b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_10vpo_const/1762652579.6880698",
- "retrieved_timestamp": "1762652579.688079",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_10vpo_const"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25361706937592254
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3234331515135053
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04909365558912387
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32355208333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15965757978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_1vpo_const/e0452e02-8cf3-4da6-83f6-844f1de6fac2.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_1vpo_const/e0452e02-8cf3-4da6-83f6-844f1de6fac2.json
deleted file mode 100644
index 9495aa32f15fa46c32082554f6412e7e9aa9c494..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_1vpo_const/e0452e02-8cf3-4da6-83f6-844f1de6fac2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_1vpo_const/1762652579.688372",
- "retrieved_timestamp": "1762652579.688373",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_1vpo_const"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24479935460134664
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32395300683134437
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2751677852348993
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32485416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15866023936170212
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_3vpo_const/0792bedd-3891-4622-983b-886c126ace68.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_3vpo_const/0792bedd-3891-4622-983b-886c126ace68.json
deleted file mode 100644
index baa74748093212de4c7c9bc14c77e7ae7c9c4810..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_3vpo_const/0792bedd-3891-4622-983b-886c126ace68.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-1ep_3vpo_const/1762652579.688585",
- "retrieved_timestamp": "1762652579.688586",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-1ep_3vpo_const"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25046986440422525
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.322699453909483
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04682779456193353
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3209166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1589095744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam/31e52020-32b2-4271-89b5-31dfde730404.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam/31e52020-32b2-4271-89b5-31dfde730404.json
deleted file mode 100644
index 66e9a85bbf868ec5cbc95330d439582a176c98d7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam/31e52020-32b2-4271-89b5-31dfde730404.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam/1762652579.6888041",
- "retrieved_timestamp": "1762652579.688805",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24719743613611883
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.325505796038594
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04984894259818731
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2751677852348993
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32079166666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15866023936170212
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_1vpo_const/06074d49-defe-4303-9899-18f074a06935.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_1vpo_const/06074d49-defe-4303-9899-18f074a06935.json
deleted file mode 100644
index d6fc40f2651894d1071300aaa34affdb4d37c65b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_1vpo_const/06074d49-defe-4303-9899-18f074a06935.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_1vpo_const/1762652579.689013",
- "retrieved_timestamp": "1762652579.689014",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_1vpo_const"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24165214962964932
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3255889369754366
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0581570996978852
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32745833333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15625
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_3vpo_const/1ef0a501-863d-49dc-9bda-5151fb161b41.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_3vpo_const/1ef0a501-863d-49dc-9bda-5151fb161b41.json
deleted file mode 100644
index c1c7bef9282f99c44596177c515d840d48391e64..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_3vpo_const/1ef0a501-863d-49dc-9bda-5151fb161b41.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VDPO_5e-7-3ep_3vpo_const/1762652579.689225",
- "retrieved_timestamp": "1762652579.689225",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-VDPO_5e-7-3ep_3vpo_const"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2526928549581776
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32354099176995715
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05362537764350453
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2785234899328859
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32348958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15799534574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam/15177605-2eea-4d8a-8462-7b64f7d29071.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam/15177605-2eea-4d8a-8462-7b64f7d29071.json
deleted file mode 100644
index 0efefab1c44a773236190a78b8d1c4a4fade8dde..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam/15177605-2eea-4d8a-8462-7b64f7d29071.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam/1762652579.68944",
- "retrieved_timestamp": "1762652579.689441",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26685638550158025
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3313735254746672
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07099697885196375
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3168229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16339760638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_10vpo_const/09996570-4086-46c5-900e-887c3d5d5826.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_10vpo_const/09996570-4086-46c5-900e-887c3d5d5826.json
deleted file mode 100644
index 05b7348577a2bc332dbdc8b954755a7794a994cb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_10vpo_const/09996570-4086-46c5-900e-887c3d5d5826.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_10vpo_const/1762652579.689661",
- "retrieved_timestamp": "1762652579.689662",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_10vpo_const"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.270228549138508
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3299802970903615
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07401812688821752
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2751677852348993
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32079166666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1634807180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_1vpo_const/8a24b990-24f1-46f6-a4f9-4ecaa39b4ec7.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_1vpo_const/8a24b990-24f1-46f6-a4f9-4ecaa39b4ec7.json
deleted file mode 100644
index 68cc1ff0ebca85a08176cf2971c6383103ed77df..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_1vpo_const/8a24b990-24f1-46f6-a4f9-4ecaa39b4ec7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_1vpo_const/1762652579.689882",
- "retrieved_timestamp": "1762652579.689883",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_1vpo_const"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24802191518504235
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33086196042215565
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06797583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3208229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16489361702127658
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_30vpo_const/ac310031-4080-4124-a858-e1293532b222.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_30vpo_const/ac310031-4080-4124-a858-e1293532b222.json
deleted file mode 100644
index 4692c9c690cc1c976538e66a4245c05f47f3d1a8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_30vpo_const/ac310031-4080-4124-a858-e1293532b222.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_30vpo_const/1762652579.690102",
- "retrieved_timestamp": "1762652579.690103",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_30vpo_const"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26223531341285566
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3281993681712964
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07401812688821752
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26929530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.322125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16339760638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_3vpo_const/75a8a0dd-e64d-4462-b8be-8006f6710653.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_3vpo_const/75a8a0dd-e64d-4462-b8be-8006f6710653.json
deleted file mode 100644
index 4e8e153d1ed178b5c4b61b03b4d37809352015b1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_3vpo_const/75a8a0dd-e64d-4462-b8be-8006f6710653.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-1ep_3vpo_const/1762652579.690311",
- "retrieved_timestamp": "1762652579.690312",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-1ep_3vpo_const"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2608611816646498
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32980236442597805
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0649546827794562
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31679166666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1651429521276596
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam/8469a871-39e1-4b21-bb7c-fa21026a01ba.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam/8469a871-39e1-4b21-bb7c-fa21026a01ba.json
deleted file mode 100644
index 4918c73edb2b4ce384e2cfb8294a6b3b2cc7e5f5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam/8469a871-39e1-4b21-bb7c-fa21026a01ba.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam/1762652579.69052",
- "retrieved_timestamp": "1762652579.690521",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_0alp_0lam"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2930347034756668
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3219547893625387
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06268882175226587
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3115833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1590757978723404
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_10vpo_const/046380aa-08bf-4d95-a4cc-bbfaf30eb56b.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_10vpo_const/046380aa-08bf-4d95-a4cc-bbfaf30eb56b.json
deleted file mode 100644
index 33440eb6c11054258011e9337fd553d77e63c402..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_10vpo_const/046380aa-08bf-4d95-a4cc-bbfaf30eb56b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_10vpo_const/1762652579.690735",
- "retrieved_timestamp": "1762652579.690736",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_10vpo_const"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28813880503730105
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32553831509236264
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07250755287009064
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2751677852348993
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31024999999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15816156914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_1vpo_const/fa8ee240-a7ac-4edc-9ac7-beabf38af0fa.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_1vpo_const/fa8ee240-a7ac-4edc-9ac7-beabf38af0fa.json
deleted file mode 100644
index bd63ff5d15bf1446a9e851c9b647295de4d2b672..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_1vpo_const/fa8ee240-a7ac-4edc-9ac7-beabf38af0fa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_1vpo_const/1762652579.690953",
- "retrieved_timestamp": "1762652579.690954",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_1vpo_const"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2887383254209941
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3237016212336586
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07477341389728097
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31425
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16090425531914893
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_30vpo_const/6d30ee72-d0ea-496d-8375-892968c8602e.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_30vpo_const/6d30ee72-d0ea-496d-8375-892968c8602e.json
deleted file mode 100644
index 400459f84dfcef3735d5b02306aff6131d026689..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_30vpo_const/6d30ee72-d0ea-496d-8375-892968c8602e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_30vpo_const/1762652579.691165",
- "retrieved_timestamp": "1762652579.691166",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_30vpo_const"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2905368865720732
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3254390641560331
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0770392749244713
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27348993288590606
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3129166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15741356382978725
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_3vpo_const/903b0e99-e50a-4afa-8085-1fd01872c048.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_3vpo_const/903b0e99-e50a-4afa-8085-1fd01872c048.json
deleted file mode 100644
index ca90d0f7e50450c087a6d0c59a2bb6a2657e9e10..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_3vpo_const/903b0e99-e50a-4afa-8085-1fd01872c048.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-VIPO_5e-7-3ep_3vpo_const/1762652579.691372",
- "retrieved_timestamp": "1762652579.691373",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-VIPO_5e-7-3ep_3vpo_const"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2904870188876625
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32381698216947513
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0702416918429003
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27348993288590606
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30894791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15915890957446807
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1/225277d4-e1b9-4992-8e2d-678ac6157b06.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1/225277d4-e1b9-4992-8e2d-678ac6157b06.json
deleted file mode 100644
index 9d7e82cfddc7061f1e003d56321f016ab80a2483..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1/225277d4-e1b9-4992-8e2d-678ac6157b06.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1/1762652579.691587",
- "retrieved_timestamp": "1762652579.691587",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23925406809487715
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3244192088381941
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0513595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3221875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1573304521276596
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3/4991436d-59fd-4f66-b588-9103beeeba5f.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3/4991436d-59fd-4f66-b588-9103beeeba5f.json
deleted file mode 100644
index 78cb2b0dfbee114473a623a3814e1cd084cf27de..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3/4991436d-59fd-4f66-b588-9103beeeba5f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3/1762652579.691787",
- "retrieved_timestamp": "1762652579.691788",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-cDPO_5e-7-3ep_0vpo_const_0.3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24747226248576
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32090616030928304
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04607250755287009
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3275208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1566655585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1/6118242a-de0a-4734-979d-86f2cc6fc65c.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1/6118242a-de0a-4734-979d-86f2cc6fc65c.json
deleted file mode 100644
index dc4847cab285fb664f9b9973265249762fc2bebf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1/6118242a-de0a-4734-979d-86f2cc6fc65c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1/1762652579.691988",
- "retrieved_timestamp": "1762652579.691989",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-rDPO_3e-6-1ep_0vpo_const_0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.232135179102559
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32779679775418075
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04758308157099698
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3021875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14960106382978725
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1/a6b71abf-7ee1-438b-8218-98803bca8de8.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1/a6b71abf-7ee1-438b-8218-98803bca8de8.json
deleted file mode 100644
index 3aa5ea362ecfffc65d160f2db559afc67d6cfa15..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1/a6b71abf-7ee1-438b-8218-98803bca8de8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1/1762652579.6921952",
- "retrieved_timestamp": "1762652579.6921952",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2541667220752049
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3253117533747236
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.052870090634441085
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.318125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16090425531914893
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3/f7fb8d6b-9773-42e7-a426-a35a401f689a.json b/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3/f7fb8d6b-9773-42e7-a426-a35a401f689a.json
deleted file mode 100644
index dc6b2222709738a128011de32b832ccf559de503..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JayHyeon_Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3/f7fb8d6b-9773-42e7-a426-a35a401f689a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JayHyeon_Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3/1762652579.6924422",
- "retrieved_timestamp": "1762652579.692443",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JayHyeon/Qwen_0.5-rDPO_5e-7-3ep_0vpo_const_0.3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.273875539125077
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3245102552473828
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04607250755287009
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25083892617449666
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3089166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15965757978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/JungZoona_T3Q-qwen2.5-14b-v1.0-e3/eb7694ce-6fe4-4bb0-bcab-266ccc71f78a.json b/leaderboard_data/HFOpenLLMv2/alibaba/JungZoona_T3Q-qwen2.5-14b-v1.0-e3/eb7694ce-6fe4-4bb0-bcab-266ccc71f78a.json
deleted file mode 100644
index 62a54f4dc13b7a5542e253e113d9422b23c1ec96..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/JungZoona_T3Q-qwen2.5-14b-v1.0-e3/eb7694ce-6fe4-4bb0-bcab-266ccc71f78a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/JungZoona_T3Q-qwen2.5-14b-v1.0-e3/1762652579.697056",
- "retrieved_timestamp": "1762652579.697057",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "JungZoona/T3Q-qwen2.5-14b-v1.0-e3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "JungZoona/T3Q-qwen2.5-14b-v1.0-e3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.732396707403024
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7585971930826706
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2862537764350453
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41694630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5911041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5884308510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Junhoee_Qwen-Megumin/0f231e27-deec-4b10-a995-d493ecf8400f.json b/leaderboard_data/HFOpenLLMv2/alibaba/Junhoee_Qwen-Megumin/0f231e27-deec-4b10-a995-d493ecf8400f.json
deleted file mode 100644
index e8a7e939d9c2a65c75ed0673611a3c761865190f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Junhoee_Qwen-Megumin/0f231e27-deec-4b10-a995-d493ecf8400f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Junhoee_Qwen-Megumin/1762652579.69731",
- "retrieved_timestamp": "1762652579.697311",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Junhoee/Qwen-Megumin",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Junhoee/Qwen-Megumin"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7141118897857683
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.528526812457251
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4901812688821752
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39803125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41988031914893614
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "?",
- "params_billions": 15.231
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_Qwen2.5-0.5b-Test-ft/5a28540f-3a94-478c-84c0-5be8db86328a.json b/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_Qwen2.5-0.5b-Test-ft/5a28540f-3a94-478c-84c0-5be8db86328a.json
deleted file mode 100644
index 358a02790d53afa76c90d25c3be0ffd8bdcf39a6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_Qwen2.5-0.5b-Test-ft/5a28540f-3a94-478c-84c0-5be8db86328a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/KingNish_Qwen2.5-0.5b-Test-ft/1762652579.699473",
- "retrieved_timestamp": "1762652579.699473",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "KingNish/Qwen2.5-0.5b-Test-ft",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "KingNish/Qwen2.5-0.5b-Test-ft"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26708134416681073
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3231533857529747
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.035498489425981876
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.342125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16888297872340424
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued-v2.1/f12c6b15-107a-41ed-98fa-40b0af5be42e.json b/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued-v2.1/f12c6b15-107a-41ed-98fa-40b0af5be42e.json
deleted file mode 100644
index 8cdcbe08a3baa1827271395c3be26c84f802c812..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued-v2.1/f12c6b15-107a-41ed-98fa-40b0af5be42e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/KingNish_qwen-1b-continued-v2.1/1762652579.700618",
- "retrieved_timestamp": "1762652579.700619",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "KingNish/qwen-1b-continued-v2.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "KingNish/qwen-1b-continued-v2.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11268323603594019
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30416583041069006
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.00906344410876133
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41539583333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1278257978723404
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.277
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued-v2.2/cf6aeb1a-4814-41ad-96f5-b59caafb902f.json b/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued-v2.2/cf6aeb1a-4814-41ad-96f5-b59caafb902f.json
deleted file mode 100644
index 2053439a294645b6e419e3926f39064168966b17..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued-v2.2/cf6aeb1a-4814-41ad-96f5-b59caafb902f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/KingNish_qwen-1b-continued-v2.2/1762652579.7008262",
- "retrieved_timestamp": "1762652579.700827",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "KingNish/qwen-1b-continued-v2.2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "KingNish/qwen-1b-continued-v2.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14125963554479892
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30586579449667844
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015105740181268883
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25671140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35130208333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1262466755319149
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.277
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued-v2/479d9f2a-82f6-42de-b8d6-92405f60638c.json b/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued-v2/479d9f2a-82f6-42de-b8d6-92405f60638c.json
deleted file mode 100644
index 71b1da5f0dd916bb628a3d49f36c8d263c3f4fba..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued-v2/479d9f2a-82f6-42de-b8d6-92405f60638c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/KingNish_qwen-1b-continued-v2/1762652579.7004201",
- "retrieved_timestamp": "1762652579.700421",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "KingNish/qwen-1b-continued-v2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "KingNish/qwen-1b-continued-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1578711153073844
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31194932022650246
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.010574018126888218
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33927083333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11926529255319149
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.277
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued/a4063b77-fc24-4c9d-bf08-cb28fc6e8259.json b/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued/a4063b77-fc24-4c9d-bf08-cb28fc6e8259.json
deleted file mode 100644
index 0338a38071be2fc3f59333d89df7ad4a1907b9c8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/KingNish_qwen-1b-continued/a4063b77-fc24-4c9d-bf08-cb28fc6e8259.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/KingNish_qwen-1b-continued/1762652579.700214",
- "retrieved_timestamp": "1762652579.700215",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "KingNish/qwen-1b-continued",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "KingNish/qwen-1b-continued"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12547263483113694
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29909543894796364
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.00906344410876133
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38587499999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1260804521276596
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.277
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Kukedlc_Qwen-2.5-7b-Spanish-o1-CoT/c9a159fb-9e6b-49b3-8f2b-a2d2d3ca8f19.json b/leaderboard_data/HFOpenLLMv2/alibaba/Kukedlc_Qwen-2.5-7b-Spanish-o1-CoT/c9a159fb-9e6b-49b3-8f2b-a2d2d3ca8f19.json
deleted file mode 100644
index 1a3edf10e0a39d64325b92d9ee580dd078a24161..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Kukedlc_Qwen-2.5-7b-Spanish-o1-CoT/c9a159fb-9e6b-49b3-8f2b-a2d2d3ca8f19.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Kukedlc_Qwen-2.5-7b-Spanish-o1-CoT/1762652579.703295",
- "retrieved_timestamp": "1762652579.703295",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Kukedlc/Qwen-2.5-7b-Spanish-o1-CoT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4210295349672203
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5601947823443537
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726586102719033
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4776770833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4363364361702128
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lambent_qwen2.5-reinstruct-alternate-lumen-14B/974e902e-0959-42d0-98f8-288e1a6ce887.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lambent_qwen2.5-reinstruct-alternate-lumen-14B/974e902e-0959-42d0-98f8-288e1a6ce887.json
deleted file mode 100644
index cca382b4f0073666dc440f2076d7c3a93fb2c47f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Lambent_qwen2.5-reinstruct-alternate-lumen-14B/974e902e-0959-42d0-98f8-288e1a6ce887.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lambent_qwen2.5-reinstruct-alternate-lumen-14B/1762652579.707211",
- "retrieved_timestamp": "1762652579.707212",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lambent/qwen2.5-reinstruct-alternate-lumen-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Lambent/qwen2.5-reinstruct-alternate-lumen-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47938137475232384
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6458988582965893
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4622356495468278
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3766778523489933
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47700000000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.538813164893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/LenguajeNaturalAI_leniachat-qwen2-1.5B-v0/eb6e6d30-b349-447c-83d3-fe7760e83037.json b/leaderboard_data/HFOpenLLMv2/alibaba/LenguajeNaturalAI_leniachat-qwen2-1.5B-v0/eb6e6d30-b349-447c-83d3-fe7760e83037.json
deleted file mode 100644
index e23f36edd740fe787608cd263cec9e14ef324c65..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/LenguajeNaturalAI_leniachat-qwen2-1.5B-v0/eb6e6d30-b349-447c-83d3-fe7760e83037.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LenguajeNaturalAI_leniachat-qwen2-1.5B-v0/1762652579.713998",
- "retrieved_timestamp": "1762652579.713999",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LenguajeNaturalAI/leniachat-qwen2-1.5B-v0",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "LenguajeNaturalAI/leniachat-qwen2-1.5B-v0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22211842356059697
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36835590195612017
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01283987915407855
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3749895833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18799867021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.543
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v3/eb958d5c-aa2e-4640-bef7-c8b10a892847.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v3/eb958d5c-aa2e-4640-bef7-c8b10a892847.json
deleted file mode 100644
index f2981832725c6713e51ff5f67a10fc5e76d81685..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v3/eb958d5c-aa2e-4640-bef7-c8b10a892847.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v3/1762652579.736984",
- "retrieved_timestamp": "1762652579.7369852",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7048697456083193
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6478481476573447
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4161631419939577
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38171140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48075
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5393949468085106
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v4/17c5c728-e03d-45e9-aaae-816c4e90b14f.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v4/17c5c728-e03d-45e9-aaae-816c4e90b14f.json
deleted file mode 100644
index 2b2e393a3f38a4f723493058bc5e4c11107e4273..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v4/17c5c728-e03d-45e9-aaae-816c4e90b14f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v4/1762652579.737248",
- "retrieved_timestamp": "1762652579.7372491",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6943033373670748
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6419880364363972
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3466767371601209
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3716442953020134
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.476875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5251828457446809
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v5/79d3d942-8d5f-4aca-8759-8d70b8cfc5f3.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v5/79d3d942-8d5f-4aca-8759-8d70b8cfc5f3.json
deleted file mode 100644
index 1fabf9debc5deefbf10d725f5d8c1e17704456e2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v5/79d3d942-8d5f-4aca-8759-8d70b8cfc5f3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v5/1762652579.737468",
- "retrieved_timestamp": "1762652579.737469",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7485084021507378
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6466679318879384
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43580060422960726
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3624161073825503
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4473020833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5140458776595744
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt/92bff089-baed-4f1f-852b-f274a7920a1a.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt/92bff089-baed-4f1f-852b-f274a7920a1a.json
deleted file mode 100644
index d5494427f7c5550369c0d06492a6c1b1b2044667..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt/92bff089-baed-4f1f-852b-f274a7920a1a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt/1762652579.7379",
- "retrieved_timestamp": "1762652579.7379",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6-cpt"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46634152936430895
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6214839063250638
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33157099697885195
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37583892617449666
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49373958333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5204454787234043
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v6/c4b27a1b-28dd-4a79-839c-ad8673034937.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v6/c4b27a1b-28dd-4a79-839c-ad8673034937.json
deleted file mode 100644
index 4a0184237be0dbcfb91e1ccfc15cd90930e64ec8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v6/c4b27a1b-28dd-4a79-839c-ad8673034937.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v6/1762652579.737686",
- "retrieved_timestamp": "1762652579.737687",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v6"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.704320092909037
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6457646219275207
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3957703927492447
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3775167785234899
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47678125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5392287234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase/46a21741-1860-4498-8284-c94fccad1ed0.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase/46a21741-1860-4498-8284-c94fccad1ed0.json
deleted file mode 100644
index f7a55eb21a7c7ab73a5b633ab4d79eaffe5f4cb4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase/46a21741-1860-4498-8284-c94fccad1ed0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase/1762652579.738374",
- "retrieved_timestamp": "1762652579.7383769",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7-rebase"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.693054428915278
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6422587980411637
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3406344410876133
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.375
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48881250000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5276761968085106
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v7/d540acde-9601-4119-8ae2-f7cdf82f43f7.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v7/d540acde-9601-4119-8ae2-f7cdf82f43f7.json
deleted file mode 100644
index 039af056d58570f201d445682d6f5a9d3e0d012f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v7/d540acde-9601-4119-8ae2-f7cdf82f43f7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v7/1762652579.738115",
- "retrieved_timestamp": "1762652579.738116",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v7"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6793906833867471
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.653127892154805
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41012084592145015
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37919463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4833854166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5375664893617021
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.5/c723fc6f-2656-4084-81d0-4cbaf0587049.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.5/c723fc6f-2656-4084-81d0-4cbaf0587049.json
deleted file mode 100644
index 881b5f514d56f442979ad06f184714a504097e11..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.5/c723fc6f-2656-4084-81d0-4cbaf0587049.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.5/1762652579.738977",
- "retrieved_timestamp": "1762652579.7389781",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5928624937388352
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6451310724242122
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36555891238670696
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3800335570469799
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47696875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5290059840425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.6/526f6468-b7a8-47a7-9ed4-c2aa7cc63ca1.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.6/526f6468-b7a8-47a7-9ed4-c2aa7cc63ca1.json
deleted file mode 100644
index f55560c5a9e25d40c27d2b87db7d8b25661b0d9a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.6/526f6468-b7a8-47a7-9ed4-c2aa7cc63ca1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.6/1762652579.7392142",
- "retrieved_timestamp": "1762652579.7392151",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.6"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5919382793210903
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6457173605698173
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4070996978851964
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38422818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49532291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5399767287234043
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.7/56232cf6-7ee7-45ed-b139-ea20e148b5fa.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.7/56232cf6-7ee7-45ed-b139-ea20e148b5fa.json
deleted file mode 100644
index 17fbc40a7c6eccf1ea118d9bff069aadb6c3c47b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.7/56232cf6-7ee7-45ed-b139-ea20e148b5fa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.7/1762652579.7395148",
- "retrieved_timestamp": "1762652579.739517",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.7"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7874761189200211
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6482757721443902
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.540785498489426
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35151006711409394
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4380625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.524185505319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.8/51ff4f00-1d21-4f98-b5a3-7a72c4b2a5b1.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.8/51ff4f00-1d21-4f98-b5a3-7a72c4b2a5b1.json
deleted file mode 100644
index f4a9cfa1c76e7f01304745fb5881dd070c99285c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.8/51ff4f00-1d21-4f98-b5a3-7a72c4b2a5b1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.8/1762652579.739795",
- "retrieved_timestamp": "1762652579.739796",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.8"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7027963581075989
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6565626437486437
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42371601208459214
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37583892617449666
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4911979166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5323304521276596
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.9/eee0ebda-6ff8-45bd-ac4e-15aeb724d0d1.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.9/eee0ebda-6ff8-45bd-ac4e-15aeb724d0d1.json
deleted file mode 100644
index 88ff0231d990d6b0b189f750b3c0716cbfb8cc4d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.9/eee0ebda-6ff8-45bd-ac4e-15aeb724d0d1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8.9/1762652579.74003",
- "retrieved_timestamp": "1762652579.740031",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8.9"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7993413032974729
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6483097746745584
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5370090634441088
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3296979865771812
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43282291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5199468085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8/b3e7af18-231e-4839-809c-bc5bfe7b4182.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8/b3e7af18-231e-4839-809c-bc5bfe7b4182.json
deleted file mode 100644
index 54d8a94add53aee34f1f91d4b9f827293854ac48..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8/b3e7af18-231e-4839-809c-bc5bfe7b4182.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v8/1762652579.738731",
- "retrieved_timestamp": "1762652579.738732",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v8"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7874761189200211
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6419472828128271
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5558912386706949
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33557046979865773
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43936458333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5206117021276596
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9-stock/757269fe-8662-4eaa-8e76-5c2f88d8fbb0.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9-stock/757269fe-8662-4eaa-8e76-5c2f88d8fbb0.json
deleted file mode 100644
index 3ed5139c44ddbcc1cb534736333f4e4b95dca93b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9-stock/757269fe-8662-4eaa-8e76-5c2f88d8fbb0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9-stock/1762652579.740509",
- "retrieved_timestamp": "1762652579.74051",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9-stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6513639365771708
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6570671029574323
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41842900302114805
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38422818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4819583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5412234042553191
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9.1/dffd1a4a-a056-43c2-bda3-0cfa21406656.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9.1/dffd1a4a-a056-43c2-bda3-0cfa21406656.json
deleted file mode 100644
index 3abcf273a9571909bf875ab90b93a3523dcdea48..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9.1/dffd1a4a-a056-43c2-bda3-0cfa21406656.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9.1/1762652579.74074",
- "retrieved_timestamp": "1762652579.740741",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8002655177152178
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6554749578648256
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5468277945619335
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34312080536912754
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43539583333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5250997340425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9.2/b5ecb480-16e6-4dfb-be77-ad8ef4e90aa3.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9.2/b5ecb480-16e6-4dfb-be77-ad8ef4e90aa3.json
deleted file mode 100644
index a19c197d478950bd51db1c904f395347a75dfbbe..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9.2/b5ecb480-16e6-4dfb-be77-ad8ef4e90aa3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9.2/1762652579.74097",
- "retrieved_timestamp": "1762652579.74097",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7862272104682243
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6537693501484436
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5332326283987915
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35570469798657717
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43809375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5283410904255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9/682a38c6-2fb8-4c42-b6ad-69fbe65be484.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9/682a38c6-2fb8-4c42-b6ad-69fbe65be484.json
deleted file mode 100644
index d8cc77e4192c46bae500c34881b2ef4f8d606d22..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9/682a38c6-2fb8-4c42-b6ad-69fbe65be484.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-MegaFusion-v9/1762652579.740272",
- "retrieved_timestamp": "1762652579.740273",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Lunzima/NQLSG-Qwen2.5-14B-MegaFusion-v9"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.523519816309614
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6545588984302916
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43655589123867067
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3884228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4805625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.542220744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-OriginalFusion/cf14f098-cd46-4ca0-acec-02012eb78ea3.json b/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-OriginalFusion/cf14f098-cd46-4ca0-acec-02012eb78ea3.json
deleted file mode 100644
index 97958544db702c59d825de735bd273a307ed7a2b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Lunzima_NQLSG-Qwen2.5-14B-OriginalFusion/cf14f098-cd46-4ca0-acec-02012eb78ea3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Lunzima_NQLSG-Qwen2.5-14B-OriginalFusion/1762652579.741195",
- "retrieved_timestamp": "1762652579.741195",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Lunzima/NQLSG-Qwen2.5-14B-OriginalFusion"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6141947809589667
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6592166466793806
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42749244712990936
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.51215625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5238530585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Marsouuu_MiniQwenMathExpert-ECE-PRYMMAL-Martial/f1b6c510-02fe-4ffd-96da-4cfcfb04eb8c.json b/leaderboard_data/HFOpenLLMv2/alibaba/Marsouuu_MiniQwenMathExpert-ECE-PRYMMAL-Martial/f1b6c510-02fe-4ffd-96da-4cfcfb04eb8c.json
deleted file mode 100644
index ee067b6926092a91dbbfad2b8767933dfb262e4c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Marsouuu_MiniQwenMathExpert-ECE-PRYMMAL-Martial/f1b6c510-02fe-4ffd-96da-4cfcfb04eb8c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Marsouuu_MiniQwenMathExpert-ECE-PRYMMAL-Martial/1762652579.747411",
- "retrieved_timestamp": "1762652579.747412",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Marsouuu/MiniQwenMathExpert-ECE-PRYMMAL-Martial"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2794961812435449
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42301343044108936
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11404833836858005
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28187919463087246
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38673958333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2922207446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_Qwen1.5-MoE-A2.7B-Wikihow/ee23e137-57d2-49aa-b267-27bd48457d46.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_Qwen1.5-MoE-A2.7B-Wikihow/ee23e137-57d2-49aa-b267-27bd48457d46.json
deleted file mode 100644
index 3795e62912a1b802a0a755f05908ff273c1b1fe1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_Qwen1.5-MoE-A2.7B-Wikihow/ee23e137-57d2-49aa-b267-27bd48457d46.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_Qwen1.5-MoE-A2.7B-Wikihow/1762652579.750923",
- "retrieved_timestamp": "1762652579.750923",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/Qwen1.5-MoE-A2.7B-Wikihow"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29543278501043896
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3920071454890602
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0823262839879154
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2751677852348993
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35021875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23803191489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2MoeForCausalLM",
- "params_billions": 14.316
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.1-qwen2-72b/ae68a60d-a2df-45f1-b446-1400901cb6ff.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.1-qwen2-72b/ae68a60d-a2df-45f1-b446-1400901cb6ff.json
deleted file mode 100644
index 343294d935ac93df44c7d2ed4a5a55370aa3179f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.1-qwen2-72b/ae68a60d-a2df-45f1-b446-1400901cb6ff.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-qwen2-72b/1762652579.75234",
- "retrieved_timestamp": "1762652579.752341",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-2.1-qwen2-72b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-2.1-qwen2-72b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8162774770941104
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6965560971922596
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4078549848942598
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47321875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5414727393617021
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.699
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.1-qwen2-7b/6c31df3b-e408-4a6c-b475-78f174630cad.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.1-qwen2-7b/6c31df3b-e408-4a6c-b475-78f174630cad.json
deleted file mode 100644
index c1ffe918116e207d11e2f38774424344d0d21905..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.1-qwen2-7b/6c31df3b-e408-4a6c-b475-78f174630cad.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-qwen2-7b/1762652579.752553",
- "retrieved_timestamp": "1762652579.752554",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-2.1-qwen2-7b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-2.1-qwen2-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3816119008674761
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5045925887362795
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2311178247734139
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44369791666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3692652925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.1-qwen2.5-72b/2b841a46-6210-4092-875f-ca3ae36f3d25.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.1-qwen2.5-72b/2b841a46-6210-4092-875f-ca3ae36f3d25.json
deleted file mode 100644
index 0f2afb1eed516a9c7670c9e86ee5c531ecf438a6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.1-qwen2.5-72b/2b841a46-6210-4092-875f-ca3ae36f3d25.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.1-qwen2.5-72b/1762652579.752765",
- "retrieved_timestamp": "1762652579.752765",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-2.1-qwen2.5-72b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-2.1-qwen2.5-72b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8662360315075112
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7261624327092416
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5913897280966768
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36325503355704697
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42984375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5619182180851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.7
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.2-qwen2-72b/250897a9-7d48-4323-813d-fa48befe2cbe.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.2-qwen2-72b/250897a9-7d48-4323-813d-fa48befe2cbe.json
deleted file mode 100644
index 59190bae2340fd7b7065891e4afae6b62307374d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.2-qwen2-72b/250897a9-7d48-4323-813d-fa48befe2cbe.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-qwen2-72b/1762652579.753872",
- "retrieved_timestamp": "1762652579.753872",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-2.2-qwen2-72b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-2.2-qwen2-72b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8008151704145002
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6939595229335245
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45317220543806647
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37416107382550334
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4508020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.543467420212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.706
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.2-qwen2-7b/154b7a41-e1bf-4827-a6a7-279ea170ab7e.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.2-qwen2-7b/154b7a41-e1bf-4827-a6a7-279ea170ab7e.json
deleted file mode 100644
index 0b96a7b144842a75da9955372b7435061fe9349f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.2-qwen2-7b/154b7a41-e1bf-4827-a6a7-279ea170ab7e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-qwen2-7b/1762652579.7540858",
- "retrieved_timestamp": "1762652579.754087",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-2.2-qwen2-7b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-2.2-qwen2-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35972996094806226
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5214913750127922
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21450151057401812
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43582291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3898769946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.2-qwen2.5-72b/1fa2ab02-9a1c-4e7e-95b8-27e78af0ba73.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.2-qwen2.5-72b/1fa2ab02-9a1c-4e7e-95b8-27e78af0ba73.json
deleted file mode 100644
index b07673e588ffa080cd5a7923c0049d72b17b7817..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.2-qwen2.5-72b/1fa2ab02-9a1c-4e7e-95b8-27e78af0ba73.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.2-qwen2.5-72b/1762652579.754294",
- "retrieved_timestamp": "1762652579.754294",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-2.2-qwen2.5-72b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-2.2-qwen2.5-72b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8476763875406145
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7276399007138082
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5891238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35906040268456374
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4206666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.561751994680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.7
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.3-qwen2-72b/8b769df2-18f5-4712-a02b-962d3e2bb7c7.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.3-qwen2-72b/8b769df2-18f5-4712-a02b-962d3e2bb7c7.json
deleted file mode 100644
index 7bf9b81d70d4376668475b453d23bd5397f6c2c5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.3-qwen2-72b/8b769df2-18f5-4712-a02b-962d3e2bb7c7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-qwen2-72b/1762652579.755723",
- "retrieved_timestamp": "1762652579.755724",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-2.3-qwen2-72b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-2.3-qwen2-72b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3849840645044039
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6576306700720502
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31722054380664655
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3716442953020134
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4112395833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5418882978723404
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.706
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.3-qwen2-7b/3272e904-21d5-4116-abde-0e74fe48b9d5.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.3-qwen2-7b/3272e904-21d5-4116-abde-0e74fe48b9d5.json
deleted file mode 100644
index fc36c6bb67fe0cf45c93b4ddacd9e16c81d2518d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.3-qwen2-7b/3272e904-21d5-4116-abde-0e74fe48b9d5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.3-qwen2-7b/1762652579.755967",
- "retrieved_timestamp": "1762652579.755968",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-2.3-qwen2-7b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-2.3-qwen2-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3824862476008103
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5064049035932394
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20694864048338368
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4422395833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3611203457446808
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.4-qwen2-7b/5f54ee4a-42e8-4dd0-88bc-915d2f1971a0.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.4-qwen2-7b/5f54ee4a-42e8-4dd0-88bc-915d2f1971a0.json
deleted file mode 100644
index 22fdb7227e2540238ed9c2adcb6a25feb1bb4c89..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.4-qwen2-7b/5f54ee4a-42e8-4dd0-88bc-915d2f1971a0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.4-qwen2-7b/1762652579.756743",
- "retrieved_timestamp": "1762652579.756744",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-2.4-qwen2-7b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-2.4-qwen2-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32995452067181746
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5101416326251771
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20317220543806647
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2835570469798658
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44528125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3976894946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.5-qwen2-7b/762f6ff3-4823-4de8-8351-045e1d1d383b.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.5-qwen2-7b/762f6ff3-4823-4de8-8351-045e1d1d383b.json
deleted file mode 100644
index f4eae59e411990062c59c024bb9d57a764d0da1a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.5-qwen2-7b/762f6ff3-4823-4de8-8351-045e1d1d383b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.5-qwen2-7b/1762652579.757269",
- "retrieved_timestamp": "1762652579.75727",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-2.5-qwen2-7b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-2.5-qwen2-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31449221399220734
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4886561146965678
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2258308157099698
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45646875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3681848404255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.6-qwen2-7b/65f44cf9-f619-4f43-a03f-09e22386d319.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.6-qwen2-7b/65f44cf9-f619-4f43-a03f-09e22386d319.json
deleted file mode 100644
index 91fe3fabbc34ca65843fb1ef701b9fda8e7fb5e2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.6-qwen2-7b/65f44cf9-f619-4f43-a03f-09e22386d319.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.6-qwen2-7b/1762652579.7575328",
- "retrieved_timestamp": "1762652579.757534",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-2.6-qwen2-7b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-2.6-qwen2-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3442676542684522
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4930243946403894
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1216012084592145
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2843959731543625
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4586145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3731715425531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.7-qwen2-7b/f592bc27-c97c-4b14-abcf-30782d8c0056.json b/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.7-qwen2-7b/f592bc27-c97c-4b14-abcf-30782d8c0056.json
deleted file mode 100644
index caf6ae62321cf8d58fc6ba4bc18db6ffdb84676e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/MaziyarPanahi_calme-2.7-qwen2-7b/f592bc27-c97c-4b14-abcf-30782d8c0056.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/MaziyarPanahi_calme-2.7-qwen2-7b/1762652579.757804",
- "retrieved_timestamp": "1762652579.757805",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "MaziyarPanahi/calme-2.7-qwen2-7b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "MaziyarPanahi/calme-2.7-qwen2-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3592301759331906
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4883170901309997
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13821752265861026
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48242708333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3705119680851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Minami-su_Amara-o1-7B-Qwen/6910eff9-74bc-46b0-8f8c-20642bef4a12.json b/leaderboard_data/HFOpenLLMv2/alibaba/Minami-su_Amara-o1-7B-Qwen/6910eff9-74bc-46b0-8f8c-20642bef4a12.json
deleted file mode 100644
index ac866ee38f52100f0ea016e4c77f969687c651cb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Minami-su_Amara-o1-7B-Qwen/6910eff9-74bc-46b0-8f8c-20642bef4a12.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Minami-su_Amara-o1-7B-Qwen/1762652579.759999",
- "retrieved_timestamp": "1762652579.76",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Minami-su/Amara-o1-7B-Qwen",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Minami-su/Amara-o1-7B-Qwen"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7389914316236474
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5199420077880453
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5181268882175226
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40066666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4083277925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Minami-su_Amara-o2-7B-Qwen/ebd5da9f-60d5-492e-916b-5e123442316c.json b/leaderboard_data/HFOpenLLMv2/alibaba/Minami-su_Amara-o2-7B-Qwen/ebd5da9f-60d5-492e-916b-5e123442316c.json
deleted file mode 100644
index 574134d0b3214f5a135b36507d6aa96238622e1c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Minami-su_Amara-o2-7B-Qwen/ebd5da9f-60d5-492e-916b-5e123442316c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Minami-su_Amara-o2-7B-Qwen/1762652579.760268",
- "retrieved_timestamp": "1762652579.760268",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Minami-su/Amara-o2-7B-Qwen",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Minami-su/Amara-o2-7B-Qwen"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7146615424850509
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5173432604435285
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4086102719033233
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37809374999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41647273936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Nexesenex_Qwen_2.5_3b_Smarteaz_0.01a/eaf601d2-f285-4b0c-b3ab-5d029b8fe20f.json b/leaderboard_data/HFOpenLLMv2/alibaba/Nexesenex_Qwen_2.5_3b_Smarteaz_0.01a/eaf601d2-f285-4b0c-b3ab-5d029b8fe20f.json
deleted file mode 100644
index 9be688bb7726b425899c95d4c1cc3da56c33b74d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Nexesenex_Qwen_2.5_3b_Smarteaz_0.01a/eaf601d2-f285-4b0c-b3ab-5d029b8fe20f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Nexesenex_Qwen_2.5_3b_Smarteaz_0.01a/1762652579.782197",
- "retrieved_timestamp": "1762652579.782198",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Nexesenex/Qwen_2.5_3b_Smarteaz_0.01a"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4011954946209391
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4636652015725344
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1805135951661631
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43204166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2859873670212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.085
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/NikolaSigmoid_DeepSeek-R1-Distill-Qwen-1.5B-500/c0182d01-454b-4194-be7a-81b9a9672d07.json b/leaderboard_data/HFOpenLLMv2/alibaba/NikolaSigmoid_DeepSeek-R1-Distill-Qwen-1.5B-500/c0182d01-454b-4194-be7a-81b9a9672d07.json
deleted file mode 100644
index 26f772ddb0ffa31e7cc5458b3920326a44fca053..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/NikolaSigmoid_DeepSeek-R1-Distill-Qwen-1.5B-500/c0182d01-454b-4194-be7a-81b9a9672d07.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NikolaSigmoid_DeepSeek-R1-Distill-Qwen-1.5B-500/1762652579.783665",
- "retrieved_timestamp": "1762652579.783666",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "NikolaSigmoid/DeepSeek-R1-Distill-Qwen-1.5B-500"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17485715678843247
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2601595454586609
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24580536912751677
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33796875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1124501329787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.157
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/PJMixers-Dev_Qwen2.5-RomboTiesTest-7B/a954be32-0c84-4ffe-9c4f-7f895c77e197.json b/leaderboard_data/HFOpenLLMv2/alibaba/PJMixers-Dev_Qwen2.5-RomboTiesTest-7B/a954be32-0c84-4ffe-9c4f-7f895c77e197.json
deleted file mode 100644
index ad31c0a691bc7eb520f3bbd617e928aedfef9c4f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/PJMixers-Dev_Qwen2.5-RomboTiesTest-7B/a954be32-0c84-4ffe-9c4f-7f895c77e197.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/PJMixers-Dev_Qwen2.5-RomboTiesTest-7B/1762652579.811478",
- "retrieved_timestamp": "1762652579.81148",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "PJMixers-Dev/Qwen2.5-RomboTiesTest-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "PJMixers-Dev/Qwen2.5-RomboTiesTest-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7558023821238757
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5398673461520839
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4962235649546828
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4033645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4285239361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.808
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Pinkstack_PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B/7b8f75d1-ef18-4fb4-abbb-efd6147fe74c.json b/leaderboard_data/HFOpenLLMv2/alibaba/Pinkstack_PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B/7b8f75d1-ef18-4fb4-abbb-efd6147fe74c.json
deleted file mode 100644
index 4453b50609130a7b6176fbc4642f129806799cef..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Pinkstack_PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B/7b8f75d1-ef18-4fb4-abbb-efd6147fe74c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Pinkstack_PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B/1762652579.812139",
- "retrieved_timestamp": "1762652579.812139",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Pinkstack/PARM-V1.5-base-QwQ-Qwen-2.5-o1-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5084819390328772
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47105662040096935
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1691842900302115
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44785416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35106382978723405
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_QwQ-32B-Preview/1326f0c0-9355-47ff-813b-0729370e1487.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_QwQ-32B-Preview/1326f0c0-9355-47ff-813b-0729370e1487.json
deleted file mode 100644
index 7ce95fc928c58d1aa3d40871098518a69102c832..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_QwQ-32B-Preview/1326f0c0-9355-47ff-813b-0729370e1487.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_QwQ-32B-Preview/1762652579.834909",
- "retrieved_timestamp": "1762652579.83491",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/QwQ-32B-Preview",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/QwQ-32B-Preview"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4035437084713006
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6691381482252744
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44939577039274925
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2818791946308725
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4109895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5678191489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_QwQ-32B/788241ad-d975-498e-80ef-b0d04bd8db85.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_QwQ-32B/788241ad-d975-498e-80ef-b0d04bd8db85.json
deleted file mode 100644
index c8c60953c0554741ac5f932a70dd3afd5b6b45d6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_QwQ-32B/788241ad-d975-498e-80ef-b0d04bd8db85.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_QwQ-32B/1762652579.8346298",
- "retrieved_timestamp": "1762652579.834631",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/QwQ-32B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/QwQ-32B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39767372793077926
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29829653176003074
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1608761329305136
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42063541666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11959773936170212
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-0.5B/e0115d6b-3b2c-4047-b64c-1e7afb5edd55.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-0.5B/e0115d6b-3b2c-4047-b64c-1e7afb5edd55.json
deleted file mode 100644
index 1a1b64ba364ea9ba1fb81a6b45d5ed5d8f108fd4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-0.5B/e0115d6b-3b2c-4047-b64c-1e7afb5edd55.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-0.5B/1762652579.835391",
- "retrieved_timestamp": "1762652579.835392",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen1.5-0.5B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen1.5-0.5B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17056077873375977
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3153538659142558
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.017371601208459216
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25419463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36162500000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1307347074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.62
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-1.8B/7c828833-fd36-4a84-8530-d3c1769ca822.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-1.8B/7c828833-fd36-4a84-8530-d3c1769ca822.json
deleted file mode 100644
index 5ad9183697a67d07f794cb5a54045cea14be46c6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-1.8B/7c828833-fd36-4a84-8530-d3c1769ca822.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-1.8B/1762652579.835954",
- "retrieved_timestamp": "1762652579.835955",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen1.5-1.8B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen1.5-1.8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2154239639711521
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3476121558366305
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03172205438066465
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36051041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18816489361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.837
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-110B/29389e2b-7898-4f9f-ba8c-8fe4dad80295.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-110B/29389e2b-7898-4f9f-ba8c-8fe4dad80295.json
deleted file mode 100644
index 036719a30fd863e9bb485070578bfaec0de2660d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-110B/29389e2b-7898-4f9f-ba8c-8fe4dad80295.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-110B/1762652579.836433",
- "retrieved_timestamp": "1762652579.836434",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen1.5-110B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen1.5-110B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3421942667677318
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6099964981780978
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24697885196374622
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3523489932885906
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44084375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5360704787234043
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 111.21
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-14B/9afcb068-65e2-4d4c-b7ee-071eb4dbac73.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-14B/9afcb068-65e2-4d4c-b7ee-071eb4dbac73.json
deleted file mode 100644
index c008ac93017c8c20597b9cf1c2e00b710ff43224..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-14B/9afcb068-65e2-4d4c-b7ee-071eb4dbac73.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-14B/1762652579.836853",
- "retrieved_timestamp": "1762652579.836853",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen1.5-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen1.5-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2905368865720732
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5080327493808331
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20241691842900303
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41864583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36436170212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.167
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-32B/b8cd9221-dd4e-4f49-b03e-f11bdd5773e4.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-32B/b8cd9221-dd4e-4f49-b03e-f11bdd5773e4.json
deleted file mode 100644
index 19bcf18a29eb0253479ad539b69339710488e8db..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-32B/b8cd9221-dd4e-4f49-b03e-f11bdd5773e4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-32B/1762652579.837265",
- "retrieved_timestamp": "1762652579.837266",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen1.5-32B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen1.5-32B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.329729562006587
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5715390555959325
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028700906344411
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3296979865771812
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4277916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4499667553191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.512
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-4B/1e3f60f2-814a-4979-87bd-f5f94d5b09cc.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-4B/1e3f60f2-814a-4979-87bd-f5f94d5b09cc.json
deleted file mode 100644
index 78e6eea27607ea2d8145b5a964fa187fb8c556eb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-4B/1e3f60f2-814a-4979-87bd-f5f94d5b09cc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-4B/1762652579.837696",
- "retrieved_timestamp": "1762652579.837697",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen1.5-4B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen1.5-4B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24447466056729478
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40538970296725463
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.052870090634441085
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27684563758389263
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3604479166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24601063829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.95
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-7B/102378fc-7b98-4088-a6f5-3039e7b638d5.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-7B/102378fc-7b98-4088-a6f5-3039e7b638d5.json
deleted file mode 100644
index 9eba02630146ba83681112de58a4b21a556e1f4d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-7B/102378fc-7b98-4088-a6f5-3039e7b638d5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-7B/1762652579.838115",
- "retrieved_timestamp": "1762652579.8381162",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen1.5-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen1.5-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684299879874289
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4559896407693445
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09290030211480363
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4103333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29163896276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.721
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-MoE-A2.7B/c6aa0ed8-3b79-4d73-8587-762e9469f4ce.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-MoE-A2.7B/c6aa0ed8-3b79-4d73-8587-762e9469f4ce.json
deleted file mode 100644
index 555eb3cfe436fac2c095f2201adb511d1fc2f669..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen1.5-MoE-A2.7B/c6aa0ed8-3b79-4d73-8587-762e9469f4ce.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen1.5-MoE-A2.7B/1762652579.83854",
- "retrieved_timestamp": "1762652579.83854",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen1.5-MoE-A2.7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen1.5-MoE-A2.7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.265982038768246
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4113515433010766
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09290030211480363
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40134375000000005
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2777593085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2MoeForCausalLM",
- "params_billions": 14.316
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-0.5B/cdf3b683-29d9-45b4-b6a6-1f67927ef953.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-0.5B/cdf3b683-29d9-45b4-b6a6-1f67927ef953.json
deleted file mode 100644
index 4f3d5e4dec002be83a5bc8a95d25b1acb3cdc3bd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-0.5B/cdf3b683-29d9-45b4-b6a6-1f67927ef953.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-0.5B/1762652579.838974",
- "retrieved_timestamp": "1762652579.838975",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2-0.5B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2-0.5B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18732186154957736
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3239117424825444
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.026435045317220542
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37520833333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17195811170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-1.5B/6eb76673-0633-440b-8849-8fcf8cf00954.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-1.5B/6eb76673-0633-440b-8849-8fcf8cf00954.json
deleted file mode 100644
index c0d00d0d675b637a3328ee4cf2b6d13fe4225c9c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-1.5B/6eb76673-0633-440b-8849-8fcf8cf00954.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-1.5B/1762652579.839384",
- "retrieved_timestamp": "1762652579.839385",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2-1.5B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2-1.5B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21132705665412216
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35747931720577464
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0702416918429003
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36581250000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2551529255319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-57B-A14B/aafb84cd-5950-4b93-98d1-9e50fd294b65.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-57B-A14B/aafb84cd-5950-4b93-98d1-9e50fd294b65.json
deleted file mode 100644
index af612061002c5dcf79ec55d0fb5b92a3c199e611..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-57B-A14B/aafb84cd-5950-4b93-98d1-9e50fd294b65.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-57B-A14B/1762652579.8398201",
- "retrieved_timestamp": "1762652579.839821",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2-57B-A14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2-57B-A14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31126965340851165
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5618204938684165
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1865558912386707
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.417375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4916057180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2MoeForCausalLM",
- "params_billions": 57.409
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-72B/fc683e1a-327f-4a69-bd51-9022c587159b.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-72B/fc683e1a-327f-4a69-bd51-9022c587159b.json
deleted file mode 100644
index f5d39d30b22c8943069abe4cf885afbf1451c303..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-72B/fc683e1a-327f-4a69-bd51-9022c587159b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-72B/1762652579.8402402",
- "retrieved_timestamp": "1762652579.840241",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2-72B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2-72B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3823610243044012
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.661734029856643
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.311178247734139
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39429530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47036458333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5730551861702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.706
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-7B/196e965c-4570-43aa-ba0d-13972796bda9.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-7B/196e965c-4570-43aa-ba0d-13972796bda9.json
deleted file mode 100644
index 57c3b27a6fdc0fbf158f41ba9a371d6e1509ae30..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-7B/196e965c-4570-43aa-ba0d-13972796bda9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-7B/1762652579.840696",
- "retrieved_timestamp": "1762652579.840696",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3148667757106699
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.531531595001889
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2039274924471299
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4439166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41830119680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-Math-7B/fe474496-4efa-4ef7-844d-32b17abda7c8.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-Math-7B/fe474496-4efa-4ef7-844d-32b17abda7c8.json
deleted file mode 100644
index be65f27cf72ff35f3b14c9568cdafbebacf39366..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2-Math-7B/fe474496-4efa-4ef7-844d-32b17abda7c8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2-Math-7B/1762652579.841364",
- "retrieved_timestamp": "1762652579.841364",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2-Math-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2-Math-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2687048143370701
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.386954741074792
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24773413897280966
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35933333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1196808510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-0.5B/c8110747-f2dd-46d0-b2b3-706d70e1d714.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-0.5B/c8110747-f2dd-46d0-b2b3-706d70e1d714.json
deleted file mode 100644
index c93fe847112db9442f5a4d9d9336e6fd9534812d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-0.5B/c8110747-f2dd-46d0-b2b3-706d70e1d714.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-0.5B/1762652579.841982",
- "retrieved_timestamp": "1762652579.841983",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-0.5B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-0.5B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16271714606133947
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32748148151196615
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03927492447129909
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24664429530201343
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3433333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19057513297872342
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.5
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-1.5B/9982c576-75fd-47f6-8fe9-52b56fc58d3f.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-1.5B/9982c576-75fd-47f6-8fe9-52b56fc58d3f.json
deleted file mode 100644
index 41839d9fb82ca335770bfc9cc778c0f3adab0059..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-1.5B/9982c576-75fd-47f6-8fe9-52b56fc58d3f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-1.5B/1762652579.8426108",
- "retrieved_timestamp": "1762652579.842612",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-1.5B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-1.5B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26743041795768563
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40779509451366147
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09138972809667674
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35759375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28548869680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.5
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-14B/b02dabaf-2aac-468d-b0cc-c7194c2094fd.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-14B/b02dabaf-2aac-468d-b0cc-c7194c2094fd.json
deleted file mode 100644
index dc97cd17878b7b7f4d10309d78c273bf88476edc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-14B/b02dabaf-2aac-468d-b0cc-c7194c2094fd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-14B/1762652579.843051",
- "retrieved_timestamp": "1762652579.8430521",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3694464022127954
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.616051493531774
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29003021148036257
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38171140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4502395833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5248503989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-32B/9dd61039-27d0-42f3-9b03-65b0a59465d4.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-32B/9dd61039-27d0-42f3-9b03-65b0a59465d4.json
deleted file mode 100644
index a5e6dabfc7f2f380837adee7b322537f4bb7d71e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-32B/9dd61039-27d0-42f3-9b03-65b0a59465d4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-32B/1762652579.843701",
- "retrieved_timestamp": "1762652579.843702",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-32B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-32B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40766499554515356
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6770522448726507
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3564954682779456
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41191275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49783333333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5805352393617021
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-3B/43062e28-5532-4e31-ac49-fbd794c7f664.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-3B/43062e28-5532-4e31-ac49-fbd794c7f664.json
deleted file mode 100644
index 08e0a42e327034da3fee59ef4b52f6cb519878ac..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-3B/43062e28-5532-4e31-ac49-fbd794c7f664.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-3B/1762652579.8441322",
- "retrieved_timestamp": "1762652579.8441331",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-3B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2689541527591236
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4612475341011634
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14803625377643503
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4303333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3203125
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-72B/89ce1911-289d-40bb-be48-f9a4d8d73ac2.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-72B/89ce1911-289d-40bb-be48-f9a4d8d73ac2.json
deleted file mode 100644
index 2529499f5052322c08fb97dcd290a1d4f4d5b7ad..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-72B/89ce1911-289d-40bb-be48-f9a4d8d73ac2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-72B/1762652579.844565",
- "retrieved_timestamp": "1762652579.844566",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-72B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-72B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4137100670664947
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6797320670694852
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39123867069486407
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4052013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.477125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5968251329787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.706
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-7B/bed92e1c-8f11-4f70-826e-569aa55baa09.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-7B/bed92e1c-8f11-4f70-826e-569aa55baa09.json
deleted file mode 100644
index 1de74221afd75c93eef4897f590d34affa9cabae..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-7B/bed92e1c-8f11-4f70-826e-569aa55baa09.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-7B/1762652579.8449879",
- "retrieved_timestamp": "1762652579.8449888",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3374479713825982
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5416303767788616
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25075528700906347
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32466442953020136
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4424270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4365026595744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Coder-14B/d0ae041c-8b56-4ce1-841b-96622a724894.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Coder-14B/d0ae041c-8b56-4ce1-841b-96622a724894.json
deleted file mode 100644
index db4ce938c45b7ecdf0c9feec74ab07ebcb64ad23..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Coder-14B/d0ae041c-8b56-4ce1-841b-96622a724894.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-14B/1762652579.8457868",
- "retrieved_timestamp": "1762652579.845789",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-Coder-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-Coder-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3472652561869174
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5864860091741232
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22507552870090636
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29278523489932884
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3873645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4521276595744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Coder-32B/743c517a-ad0f-495d-b9d0-cdca01335933.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Coder-32B/743c517a-ad0f-495d-b9d0-cdca01335933.json
deleted file mode 100644
index ae46c4b9c63a6b18f6d59da1c0f7d8a150882d1c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Coder-32B/743c517a-ad0f-495d-b9d0-cdca01335933.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-32B/1762652579.846424",
- "retrieved_timestamp": "1762652579.846425",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-Coder-32B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-Coder-32B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4363411304228336
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.640395506550809
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30891238670694865
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3464765100671141
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4528125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5302526595744681
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Coder-7B/5e82cb32-8291-497b-ac56-16b50947d1bf.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Coder-7B/5e82cb32-8291-497b-ac56-16b50947d1bf.json
deleted file mode 100644
index 13a7a94b0c6ed039295f5765dcfda8a122e0ca60..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Coder-7B/5e82cb32-8291-497b-ac56-16b50947d1bf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Coder-7B/1762652579.846894",
- "retrieved_timestamp": "1762652579.8468952",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-Coder-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-Coder-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.344592348302504
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48556405534214747
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19184290030211482
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3448541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3679355053191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Math-7B/8fddcebe-58d2-4d40-8147-f02feabc0d9c.json b/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Math-7B/8fddcebe-58d2-4d40-8147-f02feabc0d9c.json
deleted file mode 100644
index 87c4a63febf719d3870050fb0a4e6c7d74842f8b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Qwen_Qwen2.5-Math-7B/8fddcebe-58d2-4d40-8147-f02feabc0d9c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Qwen_Qwen2.5-Math-7B/1762652579.8480499",
- "retrieved_timestamp": "1762652579.848052",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Qwen/Qwen2.5-Math-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Qwen/Qwen2.5-Math-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24599839536873275
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4454639372840941
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30513595166163143
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37809374999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27177526595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/RESMPDEV_EVA-Qwen2.5-1.5B-FRFR/648e69e2-54de-43c4-93ac-f8422fa4b9c1.json b/leaderboard_data/HFOpenLLMv2/alibaba/RESMPDEV_EVA-Qwen2.5-1.5B-FRFR/648e69e2-54de-43c4-93ac-f8422fa4b9c1.json
deleted file mode 100644
index 85bbbdfcd01fa453b44fcdbd1044c0fb73e493ce..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/RESMPDEV_EVA-Qwen2.5-1.5B-FRFR/648e69e2-54de-43c4-93ac-f8422fa4b9c1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/RESMPDEV_EVA-Qwen2.5-1.5B-FRFR/1762652579.848896",
- "retrieved_timestamp": "1762652579.848896",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "RESMPDEV/EVA-Qwen2.5-1.5B-FRFR",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "RESMPDEV/EVA-Qwen2.5-1.5B-FRFR"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.308172316121225
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3932411333682871
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1027190332326284
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3539375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27701130319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/RESMPDEV_Qwen2-Wukong-0.5B/72a11594-1d83-4e12-b82f-137b6749f5ab.json b/leaderboard_data/HFOpenLLMv2/alibaba/RESMPDEV_Qwen2-Wukong-0.5B/72a11594-1d83-4e12-b82f-137b6749f5ab.json
deleted file mode 100644
index f547bc17029e71c8feb4bb2ea2bde7d4b3d14219..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/RESMPDEV_Qwen2-Wukong-0.5B/72a11594-1d83-4e12-b82f-137b6749f5ab.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/RESMPDEV_Qwen2-Wukong-0.5B/1762652579.849144",
- "retrieved_timestamp": "1762652579.849144",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "RESMPDEV/Qwen2-Wukong-0.5B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "RESMPDEV/Qwen2-Wukong-0.5B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1854235650296768
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.308451428837168
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0015105740181268882
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23657718120805368
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3524791666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13272938829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-Coder-Qwen2-1.5b/1ff6b76b-7241-4f06-9db5-4594d3ff7a3f.json b/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-Coder-Qwen2-1.5b/1ff6b76b-7241-4f06-9db5-4594d3ff7a3f.json
deleted file mode 100644
index 8b3f7e8d85b43cc1a8deb83b392a547c9c20d59b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-Coder-Qwen2-1.5b/1ff6b76b-7241-4f06-9db5-4594d3ff7a3f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-Coder-Qwen2-1.5b/1762652579.852138",
- "retrieved_timestamp": "1762652579.852139",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Replete-AI/Replete-Coder-Qwen2-1.5b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Replete-AI/Replete-Coder-Qwen2-1.5b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30142798884736943
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34747295666696026
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03851963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4072708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21467752659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-LLM-Qwen2-7b/20a6e090-2c78-4eb9-870e-9abbcbada6f9.json b/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-LLM-Qwen2-7b/20a6e090-2c78-4eb9-870e-9abbcbada6f9.json
deleted file mode 100644
index 75f4996fe9c1fecbffe305dc34a762a56b7e8f51..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-LLM-Qwen2-7b/20a6e090-2c78-4eb9-870e-9abbcbada6f9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-LLM-Qwen2-7b/1762652579.852611",
- "retrieved_timestamp": "1762652579.852612",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Replete-AI/Replete-LLM-Qwen2-7b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Replete-AI/Replete-LLM-Qwen2-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09324813716494457
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2976924067792704
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24748322147651006
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39409374999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11569148936170212
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-LLM-Qwen2-7b/a846978d-de78-48e8-a738-54c732e50c28.json b/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-LLM-Qwen2-7b/a846978d-de78-48e8-a738-54c732e50c28.json
deleted file mode 100644
index 69b71649b30681745e57709c80259371e8690e92..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-LLM-Qwen2-7b/a846978d-de78-48e8-a738-54c732e50c28.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-LLM-Qwen2-7b/1762652579.8524",
- "retrieved_timestamp": "1762652579.8524008",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Replete-AI/Replete-LLM-Qwen2-7b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Replete-AI/Replete-LLM-Qwen2-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09047549391170981
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29852574011260374
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2533557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38476041666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1157746010638298
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-LLM-Qwen2-7b_Beta-Preview/4977e0d5-1446-41ba-b00b-e8236c896d2e.json b/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-LLM-Qwen2-7b_Beta-Preview/4977e0d5-1446-41ba-b00b-e8236c896d2e.json
deleted file mode 100644
index 7fc8bd94e8efbdad4bf7834f87e1ebde8f0d23bc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Replete-AI_Replete-LLM-Qwen2-7b_Beta-Preview/4977e0d5-1446-41ba-b00b-e8236c896d2e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Replete-AI_Replete-LLM-Qwen2-7b_Beta-Preview/1762652579.852791",
- "retrieved_timestamp": "1762652579.852791",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Replete-AI/Replete-LLM-Qwen2-7b_Beta-Preview"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08575468645416384
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2929321328066677
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2483221476510067
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3980625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1284906914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Rombo-Org_Rombo-LLM-V2.5-Qwen-7b/8713e6fb-8843-43f2-af3b-57a59d326670.json b/leaderboard_data/HFOpenLLMv2/alibaba/Rombo-Org_Rombo-LLM-V2.5-Qwen-7b/8713e6fb-8843-43f2-af3b-57a59d326670.json
deleted file mode 100644
index 0a9bc0f5a1c7467644a108859c397f2324ed0442..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Rombo-Org_Rombo-LLM-V2.5-Qwen-7b/8713e6fb-8843-43f2-af3b-57a59d326670.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Rombo-Org_Rombo-LLM-V2.5-Qwen-7b/1762652579.854495",
- "retrieved_timestamp": "1762652579.854495",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Rombo-Org/Rombo-LLM-V2.5-Qwen-7b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Rombo-Org/Rombo-LLM-V2.5-Qwen-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.748183708116686
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5399745025607596
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.506797583081571
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011744966442953
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39803125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4282746010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Sakalti_QwenTest-7/2d99163e-9ebd-49d9-ad13-ee1f780d277c.json b/leaderboard_data/HFOpenLLMv2/alibaba/Sakalti_QwenTest-7/2d99163e-9ebd-49d9-ad13-ee1f780d277c.json
deleted file mode 100644
index 1c770c304923329889a7db1c4877c37363742f37..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Sakalti_QwenTest-7/2d99163e-9ebd-49d9-ad13-ee1f780d277c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_QwenTest-7/1762652579.8585348",
- "retrieved_timestamp": "1762652579.858536",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/QwenTest-7",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Sakalti/QwenTest-7"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16718861509683197
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3063209532879154
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0037764350453172208
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34218750000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12117686170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.988
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Sakalti_qwen2.5-2.3B/6dc5b101-c681-4010-941a-3983cb9eff53.json b/leaderboard_data/HFOpenLLMv2/alibaba/Sakalti_qwen2.5-2.3B/6dc5b101-c681-4010-941a-3983cb9eff53.json
deleted file mode 100644
index 8d836bb50d09e1e3856543ebac0274feed63781e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Sakalti_qwen2.5-2.3B/6dc5b101-c681-4010-941a-3983cb9eff53.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sakalti_qwen2.5-2.3B/1762652579.869403",
- "retrieved_timestamp": "1762652579.8694038",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sakalti/qwen2.5-2.3B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Sakalti/qwen2.5-2.3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12879493078365403
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2849449123234445
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.005287009063444109
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2516778523489933
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38565625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11727061170212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2Model",
- "params_billions": 2.339
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Impish_QWEN_14B-1M/a059e151-6f32-48ff-900b-4e232aef3cc0.json b/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Impish_QWEN_14B-1M/a059e151-6f32-48ff-900b-4e232aef3cc0.json
deleted file mode 100644
index 3431e55f2d551f88cb9090bf3bddde8827ce1d43..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Impish_QWEN_14B-1M/a059e151-6f32-48ff-900b-4e232aef3cc0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Impish_QWEN_14B-1M/1762652579.8825831",
- "retrieved_timestamp": "1762652579.882584",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SicariusSicariiStuff/Impish_QWEN_14B-1M",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "SicariusSicariiStuff/Impish_QWEN_14B-1M"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7867768631675067
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6282934814011238
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39652567975830816
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35067114093959734
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46146875000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.504404920212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Impish_QWEN_7B-1M/64c02fd8-386d-4b4c-bc00-d243cfcae7f1.json b/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Impish_QWEN_7B-1M/64c02fd8-386d-4b4c-bc00-d243cfcae7f1.json
deleted file mode 100644
index f87cd61b6a4666b56ec2b1f0610a249f689f8069..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Impish_QWEN_7B-1M/64c02fd8-386d-4b4c-bc00-d243cfcae7f1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Impish_QWEN_7B-1M/1762652579.8828428",
- "retrieved_timestamp": "1762652579.882844",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SicariusSicariiStuff/Impish_QWEN_7B-1M",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "SicariusSicariiStuff/Impish_QWEN_7B-1M"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6381744881359238
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.537172912933626
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30891238670694865
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40739583333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4265292553191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Qwen2.5-14B_Uncencored/7c6f4fa2-6847-4f57-8a8f-31673bd8b1e7.json b/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Qwen2.5-14B_Uncencored/7c6f4fa2-6847-4f57-8a8f-31673bd8b1e7.json
deleted file mode 100644
index 171ef49447290f41bcd1e7bfc8cbdb85feb50ade..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Qwen2.5-14B_Uncencored/7c6f4fa2-6847-4f57-8a8f-31673bd8b1e7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Qwen2.5-14B_Uncencored/1762652579.883748",
- "retrieved_timestamp": "1762652579.883749",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SicariusSicariiStuff/Qwen2.5-14B_Uncencored",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "SicariusSicariiStuff/Qwen2.5-14B_Uncencored"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31579099012841483
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6308941945507827
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31797583081570996
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38171140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45166666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.526595744680851
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Qwen2.5-14B_Uncensored/ea18a046-87bb-42d9-a1b2-d01fe875c970.json b/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Qwen2.5-14B_Uncensored/ea18a046-87bb-42d9-a1b2-d01fe875c970.json
deleted file mode 100644
index 6187f4b33568e3764766dea2f9d3e270d63275b1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Qwen2.5-14B_Uncensored/ea18a046-87bb-42d9-a1b2-d01fe875c970.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Qwen2.5-14B_Uncensored/1762652579.883949",
- "retrieved_timestamp": "1762652579.88395",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3173147249298528
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6308941945507827
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31797583081570996
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38171140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45166666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.526595744680851
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Qwen2.5-14B_Uncensored_Instruct/8012de5a-8cb0-4039-895f-70c20e9237ee.json b/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Qwen2.5-14B_Uncensored_Instruct/8012de5a-8cb0-4039-895f-70c20e9237ee.json
deleted file mode 100644
index efe8de8bf6792e56afc7e5e426a47eb7b0d8c533..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/SicariusSicariiStuff_Qwen2.5-14B_Uncensored_Instruct/8012de5a-8cb0-4039-895f-70c20e9237ee.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SicariusSicariiStuff_Qwen2.5-14B_Uncensored_Instruct/1762652579.884166",
- "retrieved_timestamp": "1762652579.884167",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "SicariusSicariiStuff/Qwen2.5-14B_Uncensored_Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3789389929830627
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5936792404117958
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3285498489425982
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3296979865771812
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36965625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5127160904255319
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/StelleX_Qwen2.5_Math_7B_Cot/a0802c61-1314-4a46-9b61-7a89246bac42.json b/leaderboard_data/HFOpenLLMv2/alibaba/StelleX_Qwen2.5_Math_7B_Cot/a0802c61-1314-4a46-9b61-7a89246bac42.json
deleted file mode 100644
index 3fb322cdd9d65b78208932642fd1ae6cf47309cf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/StelleX_Qwen2.5_Math_7B_Cot/a0802c61-1314-4a46-9b61-7a89246bac42.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/StelleX_Qwen2.5_Math_7B_Cot/1762652579.8928509",
- "retrieved_timestamp": "1762652579.892852",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "StelleX/Qwen2.5_Math_7B_Cot",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "StelleX/Qwen2.5_Math_7B_Cot"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2142747908881767
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4312922433417096
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32628398791540786
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39241666666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.281000664893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/T145_qwen-2.5-3B-merge-test/071d7565-90e5-43e8-a158-ab333beacdcf.json b/leaderboard_data/HFOpenLLMv2/alibaba/T145_qwen-2.5-3B-merge-test/071d7565-90e5-43e8-a158-ab333beacdcf.json
deleted file mode 100644
index 3c58ab0622bc6193593c22771662c832a0e405dc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/T145_qwen-2.5-3B-merge-test/071d7565-90e5-43e8-a158-ab333beacdcf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/T145_qwen-2.5-3B-merge-test/1762652579.908712",
- "retrieved_timestamp": "1762652579.9087129",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "T145/qwen-2.5-3B-merge-test",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "T145/qwen-2.5-3B-merge-test"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5751018408932742
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4842488747720393
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3202416918429003
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40072916666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3289561170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.397
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_AceCoder-Qwen2.5-7B-Ins-Rule/7621e05b-1b5e-43e5-a65c-322334575e68.json b/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_AceCoder-Qwen2.5-7B-Ins-Rule/7621e05b-1b5e-43e5-a65c-322334575e68.json
deleted file mode 100644
index c272d8b61859419ba8c472ffe964ef6838982547..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_AceCoder-Qwen2.5-7B-Ins-Rule/7621e05b-1b5e-43e5-a65c-322334575e68.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TIGER-Lab_AceCoder-Qwen2.5-7B-Ins-Rule/1762652579.910362",
- "retrieved_timestamp": "1762652579.910363",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "TIGER-Lab/AceCoder-Qwen2.5-7B-Ins-Rule"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.742413462944986
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5404426673547671
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49924471299093653
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011744966442953
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39803125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4321808510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_AceCoder-Qwen2.5-Coder-7B-Base-Rule/f6223009-028e-4063-90ce-e008a3b5b284.json b/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_AceCoder-Qwen2.5-Coder-7B-Base-Rule/f6223009-028e-4063-90ce-e008a3b5b284.json
deleted file mode 100644
index 807348eeb5f9d8054485734b1c12ef499e65abd1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_AceCoder-Qwen2.5-Coder-7B-Base-Rule/f6223009-028e-4063-90ce-e008a3b5b284.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TIGER-Lab_AceCoder-Qwen2.5-Coder-7B-Base-Rule/1762652579.910613",
- "retrieved_timestamp": "1762652579.910613",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Base-Rule"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44076273177391545
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49023782785253694
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20166163141993956
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27181208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34488541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37450132978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_AceCoder-Qwen2.5-Coder-7B-Ins-Rule/f75e2bca-e300-4b3c-a5aa-f6aae03e7330.json b/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_AceCoder-Qwen2.5-Coder-7B-Ins-Rule/f75e2bca-e300-4b3c-a5aa-f6aae03e7330.json
deleted file mode 100644
index 413f80bc64aaa7393211f590bd89cab1bcafd317..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_AceCoder-Qwen2.5-Coder-7B-Ins-Rule/f75e2bca-e300-4b3c-a5aa-f6aae03e7330.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TIGER-Lab_AceCoder-Qwen2.5-Coder-7B-Ins-Rule/1762652579.910825",
- "retrieved_timestamp": "1762652579.910826",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "TIGER-Lab/AceCoder-Qwen2.5-Coder-7B-Ins-Rule"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6222378843690297
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5089236146835355
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36027190332326287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40463541666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34283577127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_Qwen2.5-Math-7B-CFT/07e72fc4-9c37-4a81-a788-8619035c66d3.json b/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_Qwen2.5-Math-7B-CFT/07e72fc4-9c37-4a81-a788-8619035c66d3.json
deleted file mode 100644
index 708353ab1fa30aaa6910c0383729fdc503feb459..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/TIGER-Lab_Qwen2.5-Math-7B-CFT/07e72fc4-9c37-4a81-a788-8619035c66d3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TIGER-Lab_Qwen2.5-Math-7B-CFT/1762652579.911227",
- "retrieved_timestamp": "1762652579.911228",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TIGER-Lab/Qwen2.5-Math-7B-CFT",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "TIGER-Lab/Qwen2.5-Math-7B-CFT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2776976200924658
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46369414980230833
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5574018126888217
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2860738255033557
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38866666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446476063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-r-v0.3/43b106fe-ff02-4cfe-956f-cfc9e272de78.json b/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-r-v0.3/43b106fe-ff02-4cfe-956f-cfc9e272de78.json
deleted file mode 100644
index f8ac7564e021e6b92edecebbd956e4764e041782..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-r-v0.3/43b106fe-ff02-4cfe-956f-cfc9e272de78.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-r-v0.3/1762652579.917092",
- "retrieved_timestamp": "1762652579.917093",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TheTsar1209/qwen-carpmuscle-r-v0.3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "TheTsar1209/qwen-carpmuscle-r-v0.3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44550902715904905
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6227124007872
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30060422960725075
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35067114093959734
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42776041666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5103058510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.1/ce9658b7-b457-4fb3-8fce-4173b5d93f2d.json b/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.1/ce9658b7-b457-4fb3-8fce-4173b5d93f2d.json
deleted file mode 100644
index 68496680371f6775a2878f55f75c866e997e5ff4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.1/ce9658b7-b457-4fb3-8fce-4173b5d93f2d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.1/1762652579.917331",
- "retrieved_timestamp": "1762652579.917332",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TheTsar1209/qwen-carpmuscle-v0.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "TheTsar1209/qwen-carpmuscle-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5621628390448454
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.643430074129922
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2628398791540785
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34395973154362414
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41610416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.520029920212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.2/eed9909e-db3e-4d6a-8caa-3f208ace941d.json b/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.2/eed9909e-db3e-4d6a-8caa-3f208ace941d.json
deleted file mode 100644
index d401bf54d52db192f15bd2dc9684b3eb3751cc77..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.2/eed9909e-db3e-4d6a-8caa-3f208ace941d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.2/1762652579.917543",
- "retrieved_timestamp": "1762652579.917544",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TheTsar1209/qwen-carpmuscle-v0.2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "TheTsar1209/qwen-carpmuscle-v0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5256929391791557
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6386922464145662
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28323262839879154
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35570469798657717
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43455208333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5147107712765957
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.3/f8aa8470-6803-458e-8207-b217969dd6f3.json b/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.3/f8aa8470-6803-458e-8207-b217969dd6f3.json
deleted file mode 100644
index 0083b6ac036520cacc5f5d3e8679d3113a2284a6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.3/f8aa8470-6803-458e-8207-b217969dd6f3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.3/1762652579.917758",
- "retrieved_timestamp": "1762652579.917759",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TheTsar1209/qwen-carpmuscle-v0.3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "TheTsar1209/qwen-carpmuscle-v0.3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4476322823441801
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6151533941210218
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31344410876132933
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3565436241610738
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4131875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5061502659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.4.1/c464e6b4-aa76-4b42-ab9b-71f193ec2a57.json b/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.4.1/c464e6b4-aa76-4b42-ab9b-71f193ec2a57.json
deleted file mode 100644
index 4f63f0f88a6f4279e24e89f5be8a145de9a0214b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.4.1/c464e6b4-aa76-4b42-ab9b-71f193ec2a57.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.4.1/1762652579.918201",
- "retrieved_timestamp": "1762652579.9182022",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TheTsar1209/qwen-carpmuscle-v0.4.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "TheTsar1209/qwen-carpmuscle-v0.4.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7359938297051822
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6506533698399672
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27794561933534745
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34563758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44890625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5191156914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.4/90fe60dc-76dd-4e90-99b4-c16d026afcb5.json b/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.4/90fe60dc-76dd-4e90-99b4-c16d026afcb5.json
deleted file mode 100644
index 1310d5effa5f77693c566336a1c70adb91743b9b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/TheTsar1209_qwen-carpmuscle-v0.4/90fe60dc-76dd-4e90-99b4-c16d026afcb5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TheTsar1209_qwen-carpmuscle-v0.4/1762652579.917984",
- "retrieved_timestamp": "1762652579.917985",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TheTsar1209/qwen-carpmuscle-v0.4",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "TheTsar1209/qwen-carpmuscle-v0.4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7202068289915202
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6453667027727318
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.277190332326284
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3523489932885906
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45160416666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5143783244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Triangle104_DSR1-Distill-Qwen-7B-RP/856c2575-700c-4b00-8883-bcde8841e262.json b/leaderboard_data/HFOpenLLMv2/alibaba/Triangle104_DSR1-Distill-Qwen-7B-RP/856c2575-700c-4b00-8883-bcde8841e262.json
deleted file mode 100644
index 8a390e587151eddd8f31289ae322a827960aa9e7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Triangle104_DSR1-Distill-Qwen-7B-RP/856c2575-700c-4b00-8883-bcde8841e262.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_DSR1-Distill-Qwen-7B-RP/1762652579.923616",
- "retrieved_timestamp": "1762652579.923616",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/DSR1-Distill-Qwen-7B-RP",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Triangle104/DSR1-Distill-Qwen-7B-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36092900171544834
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4326490703099772
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48036253776435045
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40454166666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30277593085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Weyaxi_Einstein-v7-Qwen2-7B/b20c1304-d782-4d41-9c15-0091f9c914e4.json b/leaderboard_data/HFOpenLLMv2/alibaba/Weyaxi_Einstein-v7-Qwen2-7B/b20c1304-d782-4d41-9c15-0091f9c914e4.json
deleted file mode 100644
index 67dad616144b08aace5f4fe6d6d9fb6fdd6e8eab..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Weyaxi_Einstein-v7-Qwen2-7B/b20c1304-d782-4d41-9c15-0091f9c914e4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Weyaxi_Einstein-v7-Qwen2-7B/1762652579.949607",
- "retrieved_timestamp": "1762652579.949609",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Weyaxi/Einstein-v7-Qwen2-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Weyaxi/Einstein-v7-Qwen2-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4099633417111043
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5161472249498397
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19939577039274925
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43997916666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4095744680851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-7B-MS-Destroyer/c5d4bbfe-68a9-4808-ab2e-e92dd88ba06a.json b/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-7B-MS-Destroyer/c5d4bbfe-68a9-4808-ab2e-e92dd88ba06a.json
deleted file mode 100644
index 09e23dbd3297ca4d603d3be0fac90f9b89ef3373..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-7B-MS-Destroyer/c5d4bbfe-68a9-4808-ab2e-e92dd88ba06a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-7B-MS-Destroyer/1762652579.953399",
- "retrieved_timestamp": "1762652579.953399",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Xiaojian9992024/Qwen2.5-7B-MS-Destroyer",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Xiaojian9992024/Qwen2.5-7B-MS-Destroyer"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7295741964653786
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5469696828400438
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.459214501510574
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42702083333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4412400265957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-Dyanka-7B-Preview-v0.2/5cf588ed-fde6-4ee1-833e-a6743cc1834c.json b/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-Dyanka-7B-Preview-v0.2/5cf588ed-fde6-4ee1-833e-a6743cc1834c.json
deleted file mode 100644
index 02a76ad5e6a6060ab1c10f7478a09c1bd3c3230c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-Dyanka-7B-Preview-v0.2/5cf588ed-fde6-4ee1-833e-a6743cc1834c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-Dyanka-7B-Preview-v0.2/1762652579.953881",
- "retrieved_timestamp": "1762652579.9538822",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview-v0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6701984068937087
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.537439126573433
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47205438066465255
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4467083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4370844414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-Dyanka-7B-Preview/97a591f9-2052-43b3-851d-ac73c793a000.json b/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-Dyanka-7B-Preview/97a591f9-2052-43b3-851d-ac73c793a000.json
deleted file mode 100644
index 847acae1d66e79f1d17c21e4a456c450f52f26d3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-Dyanka-7B-Preview/97a591f9-2052-43b3-851d-ac73c793a000.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-Dyanka-7B-Preview/1762652579.95366",
- "retrieved_timestamp": "1762652579.953661",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Xiaojian9992024/Qwen2.5-Dyanka-7B-Preview"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7640205765147586
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5543342320067098
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4879154078549849
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31711409395973156
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44807291666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43758311170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-THREADRIPPER-Medium-Censored/89ca3fb4-eb53-422c-a4dd-029bd1fc7c37.json b/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-THREADRIPPER-Medium-Censored/89ca3fb4-eb53-422c-a4dd-029bd1fc7c37.json
deleted file mode 100644
index c22414580aa26ad39a71bec6d09348a560585bb2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-THREADRIPPER-Medium-Censored/89ca3fb4-eb53-422c-a4dd-029bd1fc7c37.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-THREADRIPPER-Medium-Censored/1762652579.95415",
- "retrieved_timestamp": "1762652579.954151",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Medium-Censored"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8112064876749248
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6431453053747279
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.533987915407855
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3347315436241611
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.414
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49285239361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-THREADRIPPER-Small-AnniversaryEdition/4fcdfdff-87be-47b0-93bb-b4bc0bb2499d.json b/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-THREADRIPPER-Small-AnniversaryEdition/4fcdfdff-87be-47b0-93bb-b4bc0bb2499d.json
deleted file mode 100644
index 4d7f76714861263c5d3dc262fa0377c0d130ba95..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-THREADRIPPER-Small-AnniversaryEdition/4fcdfdff-87be-47b0-93bb-b4bc0bb2499d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-THREADRIPPER-Small-AnniversaryEdition/1762652579.954578",
- "retrieved_timestamp": "1762652579.954578",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small-AnniversaryEdition"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7403899431286763
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5465437953400678
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5075528700906344
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38069791666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4393284574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-THREADRIPPER-Small/a55039b6-922f-4732-9feb-fa757f627ebd.json b/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-THREADRIPPER-Small/a55039b6-922f-4732-9feb-fa757f627ebd.json
deleted file mode 100644
index 1ec554961f08ee2fc4014e172f8f3f46c659fd65..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-THREADRIPPER-Small/a55039b6-922f-4732-9feb-fa757f627ebd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-THREADRIPPER-Small/1762652579.9543638",
- "retrieved_timestamp": "1762652579.954365",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Xiaojian9992024/Qwen2.5-THREADRIPPER-Small"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7689164749531243
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5489785469339065
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4735649546827795
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43492708333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4356715425531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-Ultra-1.5B-25.02-Exp/ddfae432-5d3c-4c7e-bc7f-087cddea014f.json b/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-Ultra-1.5B-25.02-Exp/ddfae432-5d3c-4c7e-bc7f-087cddea014f.json
deleted file mode 100644
index 65fe9e05ce68559a9c7c9cc0b2b8a4164453e398..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/Xiaojian9992024_Qwen2.5-Ultra-1.5B-25.02-Exp/ddfae432-5d3c-4c7e-bc7f-087cddea014f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Xiaojian9992024_Qwen2.5-Ultra-1.5B-25.02-Exp/1762652579.954794",
- "retrieved_timestamp": "1762652579.9547951",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "Xiaojian9992024/Qwen2.5-Ultra-1.5B-25.02-Exp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4073403015111017
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40655813090204523
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08308157099697885
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25838926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3383125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26412898936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-1M-YOYO-V3/fdc183ed-50d6-40c3-8e7b-02a37fc42a00.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-1M-YOYO-V3/fdc183ed-50d6-40c3-8e7b-02a37fc42a00.json
deleted file mode 100644
index 01495428c32b988c00d142ca6e29335b57c7475d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-1M-YOYO-V3/fdc183ed-50d6-40c3-8e7b-02a37fc42a00.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-1M-YOYO-V3/1762652579.955529",
- "retrieved_timestamp": "1762652579.95553",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YOYO-AI/Qwen2.5-14B-1M-YOYO-V3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YOYO-AI/Qwen2.5-14B-1M-YOYO-V3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8398327548681941
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6448491305599157
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5354984894259819
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3288590604026846
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.414125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5206948138297872
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-0505/1835078d-7897-4517-9d7b-86a2285dfa27.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-0505/1835078d-7897-4517-9d7b-86a2285dfa27.json
deleted file mode 100644
index 6d02225a49b482cb5afa3020180c939f1c817076..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-0505/1835078d-7897-4517-9d7b-86a2285dfa27.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-0505/1762652579.9557781",
- "retrieved_timestamp": "1762652579.9557781",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YOYO-AI/Qwen2.5-14B-YOYO-0505",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YOYO-AI/Qwen2.5-14B-YOYO-0505"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5882912893345214
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6539239511887702
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4433534743202417
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3733221476510067
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47569791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5370678191489362
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-0510-v2/ad6edd05-e83f-4da3-b200-c1d972548e8b.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-0510-v2/ad6edd05-e83f-4da3-b200-c1d972548e8b.json
deleted file mode 100644
index e88aa2137c05ca625759ccfd26430402cc1fdd91..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-0510-v2/ad6edd05-e83f-4da3-b200-c1d972548e8b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-0510-v2/1762652579.955989",
- "retrieved_timestamp": "1762652579.955989",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YOYO-AI/Qwen2.5-14B-YOYO-0510-v2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YOYO-AI/Qwen2.5-14B-YOYO-0510-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.594710922574325
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6552826977321495
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44410876132930516
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38171140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47439583333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5380651595744681
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-0805/6d4ac88f-7a02-4f78-9990-6736972f43f7.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-0805/6d4ac88f-7a02-4f78-9990-6736972f43f7.json
deleted file mode 100644
index b27b7f7d15c1d7757744aa4036abf9ad0a076b86..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-0805/6d4ac88f-7a02-4f78-9990-6736972f43f7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-0805/1762652579.956195",
- "retrieved_timestamp": "1762652579.956195",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YOYO-AI/Qwen2.5-14B-YOYO-0805",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YOYO-AI/Qwen2.5-14B-YOYO-0805"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5882912893345214
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6539239511887702
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4433534743202417
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3733221476510067
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47569791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5370678191489362
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1005-v2/ed12a458-8c3b-4e08-a218-e94b4fdd89d8.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1005-v2/ed12a458-8c3b-4e08-a218-e94b4fdd89d8.json
deleted file mode 100644
index 705d263ea08ad6bed1e799601d74c0ae3c33940f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1005-v2/ed12a458-8c3b-4e08-a218-e94b4fdd89d8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1005-v2/1762652579.956619",
- "retrieved_timestamp": "1762652579.956619",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YOYO-AI/Qwen2.5-14B-YOYO-1005-v2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YOYO-AI/Qwen2.5-14B-YOYO-1005-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.595310442958018
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6551321410649699
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4433534743202417
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38422818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4730625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5371509308510638
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1005/29058700-6465-476d-b1c9-2bb89d70c52b.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1005/29058700-6465-476d-b1c9-2bb89d70c52b.json
deleted file mode 100644
index 483508307eaead2d207a5c166b5e233d4dcdd5e1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1005/29058700-6465-476d-b1c9-2bb89d70c52b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1005/1762652579.9563992",
- "retrieved_timestamp": "1762652579.9564002",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YOYO-AI/Qwen2.5-14B-YOYO-1005",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YOYO-AI/Qwen2.5-14B-YOYO-1005"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5971588717935079
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6542059787912534
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.452416918429003
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47303125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5382313829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1010-v2/2047ae80-fdc6-4e94-90e6-b3cac52d8c45.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1010-v2/2047ae80-fdc6-4e94-90e6-b3cac52d8c45.json
deleted file mode 100644
index df0124e2dfb95426833edd5fc94fb34864668804..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1010-v2/2047ae80-fdc6-4e94-90e6-b3cac52d8c45.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1010-v2/1762652579.957223",
- "retrieved_timestamp": "1762652579.957223",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YOYO-AI/Qwen2.5-14B-YOYO-1010-v2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YOYO-AI/Qwen2.5-14B-YOYO-1010-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.594710922574325
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6552826977321495
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44410876132930516
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38171140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47439583333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5380651595744681
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1010/1de35d6f-c62f-48fd-b921-41e85b55434a.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1010/1de35d6f-c62f-48fd-b921-41e85b55434a.json
deleted file mode 100644
index 4bc2550507f5a48d0660b1d6950c7ec895a04f89..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1010/1de35d6f-c62f-48fd-b921-41e85b55434a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1010/1762652579.957045",
- "retrieved_timestamp": "1762652579.957045",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YOYO-AI/Qwen2.5-14B-YOYO-1010",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YOYO-AI/Qwen2.5-14B-YOYO-1010"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7904737208384863
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6405986391086301
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4180625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49443151595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1010/6a676239-eed6-44dc-b395-1b2453d5b0ba.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1010/6a676239-eed6-44dc-b395-1b2453d5b0ba.json
deleted file mode 100644
index d967872c543905abe94973f96e3ad37c05f2c7d3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-1010/6a676239-eed6-44dc-b395-1b2453d5b0ba.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-1010/1762652579.956832",
- "retrieved_timestamp": "1762652579.956832",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YOYO-AI/Qwen2.5-14B-YOYO-1010",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YOYO-AI/Qwen2.5-14B-YOYO-1010"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5898648918203699
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6539973096042956
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4509063444108761
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38338926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47439583333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5375664893617021
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-SCE/e0545222-4bd1-490a-a315-5b9ce9742310.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-SCE/e0545222-4bd1-490a-a315-5b9ce9742310.json
deleted file mode 100644
index aba520b2f6032753ede02403498fc278940fed78..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-SCE/e0545222-4bd1-490a-a315-5b9ce9742310.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-SCE/1762652579.957431",
- "retrieved_timestamp": "1762652579.957431",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YOYO-AI/Qwen2.5-14B-YOYO-SCE",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YOYO-AI/Qwen2.5-14B-YOYO-SCE"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5843694729983111
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6489486805510399
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46148036253776437
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37416107382550334
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47042708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5380651595744681
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-V4-p1/441375d9-0375-4a15-9d50-267395d3ab13.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-V4-p1/441375d9-0375-4a15-9d50-267395d3ab13.json
deleted file mode 100644
index ba7018f285e7647d7f87ec9cabdb29e4a9c2af49..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-V4-p1/441375d9-0375-4a15-9d50-267395d3ab13.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-V4-p1/1762652579.957833",
- "retrieved_timestamp": "1762652579.957834",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8203488964835526
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6515535751177631
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5332326283987915
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34563758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41942708333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5019946808510638
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-V4-p2/9ecdd8a3-247b-46b2-ae3b-5798685329ef.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-V4-p2/9ecdd8a3-247b-46b2-ae3b-5798685329ef.json
deleted file mode 100644
index 3410614958480a1d6310c0e93520c28703761873..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-V4-p2/9ecdd8a3-247b-46b2-ae3b-5798685329ef.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-V4-p2/1762652579.958032",
- "retrieved_timestamp": "1762652579.9580328",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YOYO-AI/Qwen2.5-14B-YOYO-V4-p2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8047868544351211
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6338919627514907
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5166163141993958
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3271812080536913
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44345833333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49675864361702127
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-V4/c76d318b-eba5-4407-be86-a92051791f00.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-V4/c76d318b-eba5-4407-be86-a92051791f00.json
deleted file mode 100644
index b093741219937e3aa704026a14a273cec6b8b815..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-V4/c76d318b-eba5-4407-be86-a92051791f00.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-V4/1762652579.9576309",
- "retrieved_timestamp": "1762652579.957632",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YOYO-AI/Qwen2.5-14B-YOYO-V4",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YOYO-AI/Qwen2.5-14B-YOYO-V4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8397828871837835
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6490345839036636
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5347432024169184
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3221476510067114
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41152083333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5169547872340425
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-latest-V2/b97b327c-1730-4bfe-b5fe-00dbfcd0d372.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-latest-V2/b97b327c-1730-4bfe-b5fe-00dbfcd0d372.json
deleted file mode 100644
index aea1891d1fadad7042c891a8b2cc67fc0ba8e3dc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-latest-V2/b97b327c-1730-4bfe-b5fe-00dbfcd0d372.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-latest-V2/1762652579.958441",
- "retrieved_timestamp": "1762652579.958441",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YOYO-AI/Qwen2.5-14B-YOYO-latest-V2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YOYO-AI/Qwen2.5-14B-YOYO-latest-V2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7771346693440072
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6299023045601466
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5158610271903323
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3540268456375839
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42993750000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5223570478723404
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-latest/d5487f61-9be7-4ffc-af6d-be9f925dd4ba.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-latest/d5487f61-9be7-4ffc-af6d-be9f925dd4ba.json
deleted file mode 100644
index a4b69be40001cfa9318485e8ab5e520c6e9722cb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-YOYO-latest/d5487f61-9be7-4ffc-af6d-be9f925dd4ba.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-YOYO-latest/1762652579.95823",
- "retrieved_timestamp": "1762652579.958231",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YOYO-AI/Qwen2.5-14B-YOYO-latest",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YOYO-AI/Qwen2.5-14B-YOYO-latest"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.591063932587756
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6656232526900528
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4418429003021148
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3825503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.469125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5370678191489362
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-it-restore/ab78a98d-0cad-4215-8f37-f3093066a98d.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-it-restore/ab78a98d-0cad-4215-8f37-f3093066a98d.json
deleted file mode 100644
index 6ab667c679944d0d966151bcf1110c9205b90ed0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-14B-it-restore/ab78a98d-0cad-4215-8f37-f3093066a98d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-14B-it-restore/1762652579.958646",
- "retrieved_timestamp": "1762652579.958647",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YOYO-AI/Qwen2.5-14B-it-restore",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YOYO-AI/Qwen2.5-14B-it-restore"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8209484168672456
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6387730309916794
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5370090634441088
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.337248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40872916666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4900265957446808
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-7B-it-restore/2f2577b8-28e3-4fa1-8e65-66e59499b9cd.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-7B-it-restore/2f2577b8-28e3-4fa1-8e65-66e59499b9cd.json
deleted file mode 100644
index df9aa3d9429a23f8fc1626065512cf3be3ab93c7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-7B-it-restore/2f2577b8-28e3-4fa1-8e65-66e59499b9cd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-7B-it-restore/1762652579.958842",
- "retrieved_timestamp": "1762652579.958842",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YOYO-AI/Qwen2.5-7B-it-restore",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YOYO-AI/Qwen2.5-7B-it-restore"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7530796065550517
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5406524352251431
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011744966442953
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40069791666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42877327127659576
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-Coder-14B-YOYO-1010/4f6bda51-89d3-4005-9133-db6d871ae87d.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-Coder-14B-YOYO-1010/4f6bda51-89d3-4005-9133-db6d871ae87d.json
deleted file mode 100644
index 8909b69eae98031b962c12dcfd5c3e5c9f454e2c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_Qwen2.5-Coder-14B-YOYO-1010/4f6bda51-89d3-4005-9133-db6d871ae87d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YOYO-AI_Qwen2.5-Coder-14B-YOYO-1010/1762652579.9590368",
- "retrieved_timestamp": "1762652579.959038",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YOYO-AI/Qwen2.5-Coder-14B-YOYO-1010"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5335864395359867
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6186663964199025
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3217522658610272
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3523489932885906
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4422395833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4074966755319149
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V2/0c7e0639-a082-47f1-bf32-0c45ce573f0a.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V2/0c7e0639-a082-47f1-bf32-0c45ce573f0a.json
deleted file mode 100644
index 00317316d1d63101b4b2060f34daa2efc5ae87cf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V2/0c7e0639-a082-47f1-bf32-0c45ce573f0a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V2/1762652579.959567",
- "retrieved_timestamp": "1762652579.9595678",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5070834275278483
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6452083564140533
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3542296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37919463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46890625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5371509308510638
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V3/4f85534a-0b12-42c4-a0d3-06d4d8337e0c.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V3/4f85534a-0b12-42c4-a0d3-06d4d8337e0c.json
deleted file mode 100644
index 12f54f45d8aceb80bbea388cc508562eb004c1c1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V3/4f85534a-0b12-42c4-a0d3-06d4d8337e0c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V3/1762652579.959789",
- "retrieved_timestamp": "1762652579.959789",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8577928784513978
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6359248665982408
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.527190332326284
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33221476510067116
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40215625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4881150265957447
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V4/f5b253b5-4c42-49f8-9f3f-d85a5b2502c0.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V4/f5b253b5-4c42-49f8-9f3f-d85a5b2502c0.json
deleted file mode 100644
index 4a8dbcfe4daee43f0c5dc7fe96bbf6ac1a7ae102..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V4/f5b253b5-4c42-49f8-9f3f-d85a5b2502c0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YOYO-AI_ZYH-LLM-Qwen2.5-14B-V4/1762652579.959998",
- "retrieved_timestamp": "1762652579.959999",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B-V4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8364605912312664
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.651497220848125
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5392749244712991
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3145973154362416
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44342708333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5203623670212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B/2dd14fef-53f5-491d-a5e1-7e19f6043049.json b/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B/2dd14fef-53f5-491d-a5e1-7e19f6043049.json
deleted file mode 100644
index 5350741596b01a6b031e48662c40d38ce1ab4a3e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YOYO-AI_ZYH-LLM-Qwen2.5-14B/2dd14fef-53f5-491d-a5e1-7e19f6043049.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YOYO-AI_ZYH-LLM-Qwen2.5-14B/1762652579.959276",
- "retrieved_timestamp": "1762652579.9592772",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YOYO-AI/ZYH-LLM-Qwen2.5-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YOYO-AI/ZYH-LLM-Qwen2.5-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.594111402190632
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6644460038734455
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.411631419939577
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3859060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47569791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5350731382978723
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/YoungPanda_qwenqwen/7e4c528f-bb42-40e7-b849-86732d2f2a18.json b/leaderboard_data/HFOpenLLMv2/alibaba/YoungPanda_qwenqwen/7e4c528f-bb42-40e7-b849-86732d2f2a18.json
deleted file mode 100644
index 70cd583a8b5ef494528a21f61f90487fbcf06610..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/YoungPanda_qwenqwen/7e4c528f-bb42-40e7-b849-86732d2f2a18.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/YoungPanda_qwenqwen/1762652579.964632",
- "retrieved_timestamp": "1762652579.964633",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "YoungPanda/qwenqwen",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "YoungPanda/qwenqwen"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12639684924888184
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.337898518087465
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.035498489425981876
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34336458333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11677194148936171
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2MoeForCausalLM",
- "params_billions": 14.316
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen-2.5-Aether-SlerpFusion-7B/8b61e7aa-3ba3-4e25-b1bf-9718970a111a.json b/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen-2.5-Aether-SlerpFusion-7B/8b61e7aa-3ba3-4e25-b1bf-9718970a111a.json
deleted file mode 100644
index b25d9ffc3ba36958ae32b26676e4da2bda8fdbcc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen-2.5-Aether-SlerpFusion-7B/8b61e7aa-3ba3-4e25-b1bf-9718970a111a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen-2.5-Aether-SlerpFusion-7B/1762652579.9677062",
- "retrieved_timestamp": "1762652579.9677062",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "ZeroXClem/Qwen-2.5-Aether-SlerpFusion-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6261597007052399
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5462236205548866
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27341389728096677
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41778125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43267952127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-CelestialHarmony-1M/d912a685-7187-4b56-a7a8-881ed678ae2f.json b/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-CelestialHarmony-1M/d912a685-7187-4b56-a7a8-881ed678ae2f.json
deleted file mode 100644
index 48030382ee388ae717c3679d8aec0d7da239fe5a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-CelestialHarmony-1M/d912a685-7187-4b56-a7a8-881ed678ae2f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen2.5-7B-CelestialHarmony-1M/1762652579.967964",
- "retrieved_timestamp": "1762652579.967965",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "ZeroXClem/Qwen2.5-7B-CelestialHarmony-1M"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5943862285402732
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5431374181474681
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3474320241691843
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3187919463087248
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4595416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4386635638297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-HomerAnvita-NerdMix/500a7a12-9c94-4ed8-b2b4-33473141c3c7.json b/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-HomerAnvita-NerdMix/500a7a12-9c94-4ed8-b2b4-33473141c3c7.json
deleted file mode 100644
index e4eb8094e0665070345c8c1d212711e7c959511a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-HomerAnvita-NerdMix/500a7a12-9c94-4ed8-b2b4-33473141c3c7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen2.5-7B-HomerAnvita-NerdMix/1762652579.96818",
- "retrieved_timestamp": "1762652579.968181",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "ZeroXClem/Qwen2.5-7B-HomerAnvita-NerdMix"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7707649037886142
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5541319848156986
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38368580060422963
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43905208333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4431515957446808
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-HomerCreative-Mix/336aaa71-3f35-48f3-bede-cb9ab3324cfc.json b/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-HomerCreative-Mix/336aaa71-3f35-48f3-bede-cb9ab3324cfc.json
deleted file mode 100644
index 9c55a92b8c9fdd57ac2e90cce0a2ba173521fc2a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-HomerCreative-Mix/336aaa71-3f35-48f3-bede-cb9ab3324cfc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen2.5-7B-HomerCreative-Mix/1762652579.968384",
- "retrieved_timestamp": "1762652579.968385",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ZeroXClem/Qwen2.5-7B-HomerCreative-Mix",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "ZeroXClem/Qwen2.5-7B-HomerCreative-Mix"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7835044348994002
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5548068560095062
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3564954682779456
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43495833333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4447307180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-Qandora-CySec/7a495a80-f712-477b-bd5c-0cf7a07e8ef2.json b/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-Qandora-CySec/7a495a80-f712-477b-bd5c-0cf7a07e8ef2.json
deleted file mode 100644
index 19945da3ff36ddbdeb390c1cc002166d1820653e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/ZeroXClem_Qwen2.5-7B-Qandora-CySec/7a495a80-f712-477b-bd5c-0cf7a07e8ef2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ZeroXClem_Qwen2.5-7B-Qandora-CySec/1762652579.968593",
- "retrieved_timestamp": "1762652579.9685938",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ZeroXClem/Qwen2.5-7B-Qandora-CySec",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "ZeroXClem/Qwen2.5-7B-Qandora-CySec"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6773172958860268
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5490022663689288
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2930513595166163
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4286041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4484707446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/abacusai_Liberated-Qwen1.5-14B/614f3e27-e150-4edb-9438-06d0b0f38ca3.json b/leaderboard_data/HFOpenLLMv2/alibaba/abacusai_Liberated-Qwen1.5-14B/614f3e27-e150-4edb-9438-06d0b0f38ca3.json
deleted file mode 100644
index ee6f1d25906035003c6a7711b3de44fdb8fde4cf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/abacusai_Liberated-Qwen1.5-14B/614f3e27-e150-4edb-9438-06d0b0f38ca3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/abacusai_Liberated-Qwen1.5-14B/1762652579.9698281",
- "retrieved_timestamp": "1762652579.9698281",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "abacusai/Liberated-Qwen1.5-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "abacusai/Liberated-Qwen1.5-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36310212458499
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49480009174671863
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16012084592145015
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2835570469798658
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41746875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35123005319148937
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/adriszmar_QAIMath-Qwen2.5-7B-TIES/457f0bc3-68e1-4ecb-a983-5f504b1246cd.json b/leaderboard_data/HFOpenLLMv2/alibaba/adriszmar_QAIMath-Qwen2.5-7B-TIES/457f0bc3-68e1-4ecb-a983-5f504b1246cd.json
deleted file mode 100644
index 29956dc935e42b95316623db16a4a0e4e61b64c8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/adriszmar_QAIMath-Qwen2.5-7B-TIES/457f0bc3-68e1-4ecb-a983-5f504b1246cd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/adriszmar_QAIMath-Qwen2.5-7B-TIES/1762652579.975151",
- "retrieved_timestamp": "1762652579.975153",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "adriszmar/QAIMath-Qwen2.5-7B-TIES",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "adriszmar/QAIMath-Qwen2.5-7B-TIES"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16853725891745014
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31242688274884584
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0015105740181268882
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24916107382550334
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39629166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10663231382978723
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/adriszmar_QAIMath-Qwen2.5-7B-TIES/78544e05-7eed-465d-9199-35b25e1bebfe.json b/leaderboard_data/HFOpenLLMv2/alibaba/adriszmar_QAIMath-Qwen2.5-7B-TIES/78544e05-7eed-465d-9199-35b25e1bebfe.json
deleted file mode 100644
index c5126a5c1dc78a3ec63da3b597f98f6d5f83ed05..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/adriszmar_QAIMath-Qwen2.5-7B-TIES/78544e05-7eed-465d-9199-35b25e1bebfe.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/adriszmar_QAIMath-Qwen2.5-7B-TIES/1762652579.9747589",
- "retrieved_timestamp": "1762652579.9747598",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "adriszmar/QAIMath-Qwen2.5-7B-TIES",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "adriszmar/QAIMath-Qwen2.5-7B-TIES"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.174632198123202
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3126379538396578
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24496644295302014
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40959375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10871010638297872
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-42B-AGI/de6fe2ab-47de-4616-a0b9-b2cb6f44b16b.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-42B-AGI/de6fe2ab-47de-4616-a0b9-b2cb6f44b16b.json
deleted file mode 100644
index 857d3e4c4875a1b5380876196f2219ae6a6f26f9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-42B-AGI/de6fe2ab-47de-4616-a0b9-b2cb6f44b16b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-42B-AGI/1762652579.9983659",
- "retrieved_timestamp": "1762652579.998367",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Qwen2.5-42B-AGI",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "allknowingroger/Qwen2.5-42B-AGI"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19129354557019818
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2942104150907988
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36203125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11677194148936171
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 42.516
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task2/3518e992-9548-4025-a641-99a2cf3833e4.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task2/3518e992-9548-4025-a641-99a2cf3833e4.json
deleted file mode 100644
index 7b705af0567fd1470ba073b14732eb7ceb881f24..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task2/3518e992-9548-4025-a641-99a2cf3833e4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task2/1762652579.998622",
- "retrieved_timestamp": "1762652579.998623",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Qwen2.5-7B-task2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "allknowingroger/Qwen2.5-7B-task2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45270327176336567
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5625940266685543
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3549848942598187
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43696874999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4517121010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task3/0c556e08-bb71-406c-88b8-d45fc4cc43f0.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task3/0c556e08-bb71-406c-88b8-d45fc4cc43f0.json
deleted file mode 100644
index c4fa1605cf962cab28f00b180b789f1771fdb8fc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task3/0c556e08-bb71-406c-88b8-d45fc4cc43f0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task3/1762652579.998833",
- "retrieved_timestamp": "1762652579.998834",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Qwen2.5-7B-task3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "allknowingroger/Qwen2.5-7B-task3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.512903540383959
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5397623813486384
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26057401812688824
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31711409395973156
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43557291666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45013297872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task4/a200d34f-8ed0-4f1d-93e2-cff38b1811f9.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task4/a200d34f-8ed0-4f1d-93e2-cff38b1811f9.json
deleted file mode 100644
index 7e270f5d2aa98a3912904bc61811a71864c0d9f7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task4/a200d34f-8ed0-4f1d-93e2-cff38b1811f9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task4/1762652579.999042",
- "retrieved_timestamp": "1762652579.999042",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Qwen2.5-7B-task4",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "allknowingroger/Qwen2.5-7B-task4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5005385709916355
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5583446038580263
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.311178247734139
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43954166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45611702127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task7/b5b02465-0d3f-4ccc-a104-174fcf53dc9a.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task7/b5b02465-0d3f-4ccc-a104-174fcf53dc9a.json
deleted file mode 100644
index cfff1e9e2998c037931ba53545ac0a496689dd12..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task7/b5b02465-0d3f-4ccc-a104-174fcf53dc9a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task7/1762652579.999242",
- "retrieved_timestamp": "1762652579.999243",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Qwen2.5-7B-task7",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "allknowingroger/Qwen2.5-7B-task7"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42842325030917966
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.555243179835915
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0649546827794562
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4325625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4133144946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task8/956640e9-97a3-4641-9ed0-a63831a8ee58.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task8/956640e9-97a3-4641-9ed0-a63831a8ee58.json
deleted file mode 100644
index 8944fce29e71fee315e5340a5750e549c727fb2d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-7B-task8/956640e9-97a3-4641-9ed0-a63831a8ee58.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-7B-task8/1762652579.9994612",
- "retrieved_timestamp": "1762652579.999462",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Qwen2.5-7B-task8",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "allknowingroger/Qwen2.5-7B-task8"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4645185884564068
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5524895381578828
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3527190332326284
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45144791666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44331781914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-slerp-14B/ba80d36c-7688-40e8-8182-251c6b9e6b19.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-slerp-14B/ba80d36c-7688-40e8-8182-251c6b9e6b19.json
deleted file mode 100644
index 767c315318dfd708169142115b4ebd5d3e5e2666..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwen2.5-slerp-14B/ba80d36c-7688-40e8-8182-251c6b9e6b19.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Qwen2.5-slerp-14B/1762652579.999685",
- "retrieved_timestamp": "1762652579.999686",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Qwen2.5-slerp-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "allknowingroger/Qwen2.5-slerp-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49282016161562425
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.65124197415124
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4622356495468278
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3674496644295302
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47439583333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5378989361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp12-7B/18c67de4-1518-44b6-b92f-b490e9d55877.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp12-7B/18c67de4-1518-44b6-b92f-b490e9d55877.json
deleted file mode 100644
index 4f2abde9964a5eaa0425835e0fea82540b093663..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp12-7B/18c67de4-1518-44b6-b92f-b490e9d55877.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_QwenSlerp12-7B/1762652579.999902",
- "retrieved_timestamp": "1762652579.999903",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/QwenSlerp12-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "allknowingroger/QwenSlerp12-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5075577246151324
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5556448443090559
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2945619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31543624161073824
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45947916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4460605053191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp4-14B/1393cab1-31aa-470c-bca1-53f99d7ea1e8.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp4-14B/1393cab1-31aa-470c-bca1-53f99d7ea1e8.json
deleted file mode 100644
index ea61568c4befc2d79d95dae578ed2d2d7ee9a9c3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp4-14B/1393cab1-31aa-470c-bca1-53f99d7ea1e8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_QwenSlerp4-14B/1762652580.000124",
- "retrieved_timestamp": "1762652580.000125",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/QwenSlerp4-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "allknowingroger/QwenSlerp4-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6327544249258634
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6483250205703057
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3693353474320242
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3724832214765101
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46496875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5435505319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp5-14B/da7928ec-55b8-4d4b-9b9e-b40c5de7136b.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp5-14B/da7928ec-55b8-4d4b-9b9e-b40c5de7136b.json
deleted file mode 100644
index f8881ba9715e893a12486fc9abd66d2e662126ab..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp5-14B/da7928ec-55b8-4d4b-9b9e-b40c5de7136b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_QwenSlerp5-14B/1762652580.000389",
- "retrieved_timestamp": "1762652580.0003898",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/QwenSlerp5-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "allknowingroger/QwenSlerp5-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7119387669162267
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6356573710010681
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3564954682779456
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3649328859060403
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4675416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5390625
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp6-14B/5135513f-f255-412b-ab16-f0d613e4525e.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp6-14B/5135513f-f255-412b-ab16-f0d613e4525e.json
deleted file mode 100644
index fa75519e2a20690138410732f12ed2a21b7c392e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenSlerp6-14B/5135513f-f255-412b-ab16-f0d613e4525e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_QwenSlerp6-14B/1762652580.0006049",
- "retrieved_timestamp": "1762652580.000606",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/QwenSlerp6-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "allknowingroger/QwenSlerp6-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6866846633598851
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6384454358065165
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3723564954682779
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3733221476510067
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46896875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5405585106382979
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenStock1-14B/95c86ae6-dcb7-4ed7-a82d-ce0b374cca0e.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenStock1-14B/95c86ae6-dcb7-4ed7-a82d-ce0b374cca0e.json
deleted file mode 100644
index 9f9cd83f52fde25f8fa78d2fae984cf4ac788f21..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenStock1-14B/95c86ae6-dcb7-4ed7-a82d-ce0b374cca0e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_QwenStock1-14B/1762652580.0008268",
- "retrieved_timestamp": "1762652580.0008278",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/QwenStock1-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "allknowingroger/QwenStock1-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5634117474966422
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6528491305599156
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3768882175226586
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3766778523489933
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47296875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5418051861702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenStock2-14B/4a4c258b-2b03-4fad-a5e0-b623a25fb735.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenStock2-14B/4a4c258b-2b03-4fad-a5e0-b623a25fb735.json
deleted file mode 100644
index e1b39207a1a0f3b76e0d04124c2c17866fbde40e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenStock2-14B/4a4c258b-2b03-4fad-a5e0-b623a25fb735.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_QwenStock2-14B/1762652580.001041",
- "retrieved_timestamp": "1762652580.001042",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/QwenStock2-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "allknowingroger/QwenStock2-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5563427261887348
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.656885010139055
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38821752265861026
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37919463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47560416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5405585106382979
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenStock3-14B/2b3928ad-ab69-4e63-aa3c-e64dea7b5e6c.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenStock3-14B/2b3928ad-ab69-4e63-aa3c-e64dea7b5e6c.json
deleted file mode 100644
index 25a231beb09fb7de10575252a8567dd12efa7ef0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_QwenStock3-14B/2b3928ad-ab69-4e63-aa3c-e64dea7b5e6c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_QwenStock3-14B/1762652580.0012438",
- "retrieved_timestamp": "1762652580.001245",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/QwenStock3-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "allknowingroger/QwenStock3-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5615134509767417
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6565322062808641
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3776435045317221
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3783557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4755729166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5428025265957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp2-14B/636ed71e-3d86-4d5d-8b8d-3019f26261fc.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp2-14B/636ed71e-3d86-4d5d-8b8d-3019f26261fc.json
deleted file mode 100644
index a3efa01640491d6360bb1e311f26de17f534943c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp2-14B/636ed71e-3d86-4d5d-8b8d-3019f26261fc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Qwenslerp2-14B/1762652580.001452",
- "retrieved_timestamp": "1762652580.0014532",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Qwenslerp2-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "allknowingroger/Qwenslerp2-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5007136619724553
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6554876216007552
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44561933534743203
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36828859060402686
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4729375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5403091755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp2-7B/a1e6f539-f5d7-4f57-b0da-4df7e5a86240.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp2-7B/a1e6f539-f5d7-4f57-b0da-4df7e5a86240.json
deleted file mode 100644
index 87145601383c5c14956347e4dde724e4ec7d140f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp2-7B/a1e6f539-f5d7-4f57-b0da-4df7e5a86240.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Qwenslerp2-7B/1762652580.001649",
- "retrieved_timestamp": "1762652580.0016499",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Qwenslerp2-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "allknowingroger/Qwenslerp2-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5294396645345462
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5609127334788001
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3421450151057402
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4356041666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4515458776595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp3-14B/06a2a807-3dbc-42c4-adec-4d6caa01cf74.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp3-14B/06a2a807-3dbc-42c4-adec-4d6caa01cf74.json
deleted file mode 100644
index 2a087c5084f8b6f2248acdf94c0e670ef85d14f8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp3-14B/06a2a807-3dbc-42c4-adec-4d6caa01cf74.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Qwenslerp3-14B/1762652580.001856",
- "retrieved_timestamp": "1762652580.001856",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Qwenslerp3-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "allknowingroger/Qwenslerp3-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5052349986923584
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6520835120117142
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44637462235649544
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.375
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46760416666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5394780585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp3-7B/88727af1-7672-4ab5-9cc4-f56d286f3967.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp3-7B/88727af1-7672-4ab5-9cc4-f56d286f3967.json
deleted file mode 100644
index cc6eb877f8217f1f5217dce8f4d15477c7c09c7f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Qwenslerp3-7B/88727af1-7672-4ab5-9cc4-f56d286f3967.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Qwenslerp3-7B/1762652580.0020611",
- "retrieved_timestamp": "1762652580.002062",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Qwenslerp3-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "allknowingroger/Qwenslerp3-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.501837347127843
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5580160200086862
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3217522658610272
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32466442953020136
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45151041666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45420545212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Rombos-LLM-V2.5-Qwen-42b/619fde94-d095-4f5c-b36d-19a38b6a8109.json b/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Rombos-LLM-V2.5-Qwen-42b/619fde94-d095-4f5c-b36d-19a38b6a8109.json
deleted file mode 100644
index ef86715ffa6154129fc30d6c9522e4fad3118e3f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/allknowingroger_Rombos-LLM-V2.5-Qwen-42b/619fde94-d095-4f5c-b36d-19a38b6a8109.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Rombos-LLM-V2.5-Qwen-42b/1762652580.002683",
- "retrieved_timestamp": "1762652580.002683",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Rombos-LLM-V2.5-Qwen-42b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "allknowingroger/Rombos-LLM-V2.5-Qwen-42b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1879213819332704
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2969164076001621
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36333333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11677194148936171
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 42.516
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-1.5B-Blunt/d75b9105-a60d-49d9-8606-7b23ff5d3d1a.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-1.5B-Blunt/d75b9105-a60d-49d9-8606-7b23ff5d3d1a.json
deleted file mode 100644
index 0cbe3bfeded6d2ff3c2cbd00d7870449e93f8d53..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-1.5B-Blunt/d75b9105-a60d-49d9-8606-7b23ff5d3d1a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-1.5B-Blunt/1762652580.03596",
- "retrieved_timestamp": "1762652580.0359628",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Blunt"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.261136008014291
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27743669901671336
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13821752265861026
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24748322147651006
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35952083333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11835106382978723
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-1.5B-Reflective/40933520-61e0-4cbe-b6b2-b4d19063a1b9.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-1.5B-Reflective/40933520-61e0-4cbe-b6b2-b4d19063a1b9.json
deleted file mode 100644
index 21a23904e15d84bb78da95fe352beb7912267319..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-1.5B-Reflective/40933520-61e0-4cbe-b6b2-b4d19063a1b9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-1.5B-Reflective/1762652580.0363572",
- "retrieved_timestamp": "1762652580.0363579",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "braindao/DeepSeek-R1-Distill-Qwen-1.5B-Reflective"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30327641768285923
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2908444769655102
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16314199395770393
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33555208333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11303191489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-ABUB-ST/46a36382-df06-4dc1-93ae-6ae61343a969.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-ABUB-ST/46a36382-df06-4dc1-93ae-6ae61343a969.json
deleted file mode 100644
index 1365fab3ddf472888c0ec84ac6b922cdaad9ef67..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-ABUB-ST/46a36382-df06-4dc1-93ae-6ae61343a969.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-ABUB-ST/1762652580.036823",
- "retrieved_timestamp": "1762652580.036824",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-ABUB-ST"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3751922676276723
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4926903187457697
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5015105740181269
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3447986577181208
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4220625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42428523936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective/269f307e-3af1-47a2-92ec-00a59b4725ac.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective/269f307e-3af1-47a2-92ec-00a59b4725ac.json
deleted file mode 100644
index 4e17e6d84613df2c669d2cf0b0f1e9d95288f1a5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective/269f307e-3af1-47a2-92ec-00a59b4725ac.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective/1762652580.03794",
- "retrieved_timestamp": "1762652580.037941",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt-Reflective"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.554044380022784
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.337106084887115
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23716012084592145
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4247604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15043218085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt/244417b6-88a2-483f-adba-c1d944c9cc29.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt/244417b6-88a2-483f-adba-c1d944c9cc29.json
deleted file mode 100644
index 869bb2c52915094c2cf55874de50d7cdddca568f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt/244417b6-88a2-483f-adba-c1d944c9cc29.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt/1762652580.037686",
- "retrieved_timestamp": "1762652580.037687",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Blunt"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5221456845614081
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3198581755956472
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25075528700906347
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2785234899328859
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4526979166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14835438829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective/1bf5eb2a-c0e2-4bfc-9ae1-ec5737974cbe.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective/1bf5eb2a-c0e2-4bfc-9ae1-ec5737974cbe.json
deleted file mode 100644
index ea8005b0c5ced8bcd0fc636eb07c9e0ebc51be91..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective/1bf5eb2a-c0e2-4bfc-9ae1-ec5737974cbe.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective/1762652580.038195",
- "retrieved_timestamp": "1762652580.038196",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored-Reflective"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5139274901705253
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3013444769655102
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1472809667673716
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.287751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44333333333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12890625
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored/41186ba2-77da-496c-afd0-c0f11ea05c9b.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored/41186ba2-77da-496c-afd0-c0f11ea05c9b.json
deleted file mode 100644
index 1ecb347630a29013eb8886a577dd9221ea7d7e54..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored/41186ba2-77da-496c-afd0-c0f11ea05c9b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored/1762652580.037415",
- "retrieved_timestamp": "1762652580.037416",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt-Uncensored"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5421791956453321
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3170339746824052
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16314199395770393
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4486979166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14311835106382978
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt/407adfd5-6a1f-420a-a5de-2e37740d7025.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt/407adfd5-6a1f-420a-a5de-2e37740d7025.json
deleted file mode 100644
index f2256bd73bc8601ffa50369688ecc3b27cd591b7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt/407adfd5-6a1f-420a-a5de-2e37740d7025.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Blunt/1762652580.0370848",
- "retrieved_timestamp": "1762652580.037087",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Blunt"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5611632690151022
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32828968244496226
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16389728096676737
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45542708333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14469747340425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Reflective/744cef52-b155-4bb0-9411-2eb47938b5d6.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Reflective/744cef52-b155-4bb0-9411-2eb47938b5d6.json
deleted file mode 100644
index 4466383a22364781a26b5c640fbad5acc36b3dad..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B-Reflective/744cef52-b155-4bb0-9411-2eb47938b5d6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B-Reflective/1762652580.038453",
- "retrieved_timestamp": "1762652580.038454",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "braindao/DeepSeek-R1-Distill-Qwen-14B-Reflective"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4290227706928727
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.301225755504323
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19184290030211482
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4553958333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11294880319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B/f269f0cb-4f9b-4f29-84c2-a4f31ff08290.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B/f269f0cb-4f9b-4f29-84c2-a4f31ff08290.json
deleted file mode 100644
index 843f11416535adfb30bf2e39f3639a91416a0647..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-14B/f269f0cb-4f9b-4f29-84c2-a4f31ff08290.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-14B/1762652580.036597",
- "retrieved_timestamp": "1762652580.036598",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "braindao/DeepSeek-R1-Distill-Qwen-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "braindao/DeepSeek-R1-Distill-Qwen-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4171575863154209
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30329653176003074
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17598187311178248
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4487916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11269946808510638
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B-Blunt/678a08d8-3089-4d97-879d-c5485344de05.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B-Blunt/678a08d8-3089-4d97-879d-c5485344de05.json
deleted file mode 100644
index b12e2691375ed5d885d36fe761347e93ad526989..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B-Blunt/678a08d8-3089-4d97-879d-c5485344de05.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-7B-Blunt/1762652580.03893",
- "retrieved_timestamp": "1762652580.038931",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "braindao/DeepSeek-R1-Distill-Qwen-7B-Blunt"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4266246891581005
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29017781029884354
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21450151057401812
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38851041666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11693816489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored/9c8db160-fc92-473f-a766-fb00fc099f6e.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored/9c8db160-fc92-473f-a766-fb00fc099f6e.json
deleted file mode 100644
index 34eefcbdfbce3e2f8c65a150cbdf8f9dbc8fc517..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored/9c8db160-fc92-473f-a766-fb00fc099f6e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored/1762652580.03921",
- "retrieved_timestamp": "1762652580.039211",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "braindao/DeepSeek-R1-Distill-Qwen-7B-ORPO-Uncensored"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3654503384353515
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2958444769655102
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17371601208459214
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2533557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38460416666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11328125
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B-Reflective/fd05a73b-5b6a-460e-85d5-547710ab6bac.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B-Reflective/fd05a73b-5b6a-460e-85d5-547710ab6bac.json
deleted file mode 100644
index 8a96518e55de4ff3ac74653ac4b2fcedf6be5db0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B-Reflective/fd05a73b-5b6a-460e-85d5-547710ab6bac.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-7B-Reflective/1762652580.039571",
- "retrieved_timestamp": "1762652580.039572",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "braindao/DeepSeek-R1-Distill-Qwen-7B-Reflective"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3921783091087204
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2906778102988436
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20241691842900303
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25419463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38999999999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1155252659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B/b4c9ec76-b126-4715-b3cf-c0d8a8a61d44.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B/b4c9ec76-b126-4715-b3cf-c0d8a8a61d44.json
deleted file mode 100644
index e117f9ff4c2ea51d761f4de972308e8172bafc42..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_DeepSeek-R1-Distill-Qwen-7B/b4c9ec76-b126-4715-b3cf-c0d8a8a61d44.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/braindao_DeepSeek-R1-Distill-Qwen-7B/1762652580.0386932",
- "retrieved_timestamp": "1762652580.038694",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "braindao/DeepSeek-R1-Distill-Qwen-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "braindao/DeepSeek-R1-Distill-Qwen-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39679938119744496
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2886778102988436
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19184290030211482
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37666666666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1141123670212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_Qwen2.5-14B/7be8016c-2454-4228-b10d-badba12e845b.json b/leaderboard_data/HFOpenLLMv2/alibaba/braindao_Qwen2.5-14B/7be8016c-2454-4228-b10d-badba12e845b.json
deleted file mode 100644
index 48a07267d43e64f1684adebfe1dbf76ecf2d1552..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/braindao_Qwen2.5-14B/7be8016c-2454-4228-b10d-badba12e845b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/braindao_Qwen2.5-14B/1762652580.039853",
- "retrieved_timestamp": "1762652580.039854",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "braindao/Qwen2.5-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "braindao/Qwen2.5-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.540854931581537
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5852660409288039
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29229607250755285
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3733221476510067
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41235416666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48836436170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_CyberCore-Qwen-2.1-7B/131132b7-5b2a-421f-aa02-360ef9b7f206.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_CyberCore-Qwen-2.1-7B/131132b7-5b2a-421f-aa02-360ef9b7f206.json
deleted file mode 100644
index df995b7d0c80d3ba6b455b749ee77294ee57fb48..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_CyberCore-Qwen-2.1-7B/131132b7-5b2a-421f-aa02-360ef9b7f206.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_CyberCore-Qwen-2.1-7B/1762652580.0426219",
- "retrieved_timestamp": "1762652580.042623",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/CyberCore-Qwen-2.1-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/CyberCore-Qwen-2.1-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5765757080103016
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5572089082936126
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35876132930513593
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4144895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4444813829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_DeepQwen-3B-LCoT-SCE/49243e70-a24d-4e0c-b4c6-4275be1db944.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_DeepQwen-3B-LCoT-SCE/49243e70-a24d-4e0c-b4c6-4275be1db944.json
deleted file mode 100644
index 16eea3303e2a18dcb0c0168830434cf44724be40..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_DeepQwen-3B-LCoT-SCE/49243e70-a24d-4e0c-b4c6-4275be1db944.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_DeepQwen-3B-LCoT-SCE/1762652580.042877",
- "retrieved_timestamp": "1762652580.042878",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/DeepQwen-3B-LCoT-SCE",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/DeepQwen-3B-LCoT-SCE"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4489809261647983
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45123121380305237
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24697885196374622
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35139583333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3289561170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.396
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_DeepSeek-R1-Distill-Qwen-7B-RRP-Ex/7e6a55fb-da39-4b16-a59b-70635e636c02.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_DeepSeek-R1-Distill-Qwen-7B-RRP-Ex/7e6a55fb-da39-4b16-a59b-70635e636c02.json
deleted file mode 100644
index f99a6b123dca7fda861da200c31636d4d5e7b4a9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_DeepSeek-R1-Distill-Qwen-7B-RRP-Ex/7e6a55fb-da39-4b16-a59b-70635e636c02.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_DeepSeek-R1-Distill-Qwen-7B-RRP-Ex/1762652580.043099",
- "retrieved_timestamp": "1762652580.043099",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/DeepSeek-R1-Distill-Qwen-7B-RRP-Ex"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39010492160800014
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3494110718041537
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16540785498489427
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2785234899328859
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3663125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2508311170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_FwF-Qwen-7B-0.1/bfaeefb1-93c9-470b-9376-9c67a1d20862.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_FwF-Qwen-7B-0.1/bfaeefb1-93c9-470b-9376-9c67a1d20862.json
deleted file mode 100644
index 8dd4a817c4683229b58118c5695d2a2b912b886b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_FwF-Qwen-7B-0.1/bfaeefb1-93c9-470b-9376-9c67a1d20862.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_FwF-Qwen-7B-0.1/1762652580.04422",
- "retrieved_timestamp": "1762652580.044221",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/FwF-Qwen-7B-0.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/FwF-Qwen-7B-0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30045390674521383
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5019272523147252
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2764350453172205
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39520833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4060837765957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_FwF-Qwen-7B-0.2/ee7b9254-5e4a-46a0-a8b3-2ecc1708e6ab.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_FwF-Qwen-7B-0.2/ee7b9254-5e4a-46a0-a8b3-2ecc1708e6ab.json
deleted file mode 100644
index 2b01dac53def0e561ca31dda9696290712fade2c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_FwF-Qwen-7B-0.2/ee7b9254-5e4a-46a0-a8b3-2ecc1708e6ab.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_FwF-Qwen-7B-0.2/1762652580.044472",
- "retrieved_timestamp": "1762652580.0444732",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/FwF-Qwen-7B-0.2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/FwF-Qwen-7B-0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44790710869382133
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5596406929346521
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4259818731117825
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42178125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4382480053191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Sky-T1/33cc8f90-d019-49d9-8220-d66260659435.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Sky-T1/33cc8f90-d019-49d9-8220-d66260659435.json
deleted file mode 100644
index 5f4e2ab6188d210adca72f2d28c8de416a65b80e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Sky-T1/33cc8f90-d019-49d9-8220-d66260659435.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Deep-Sky-T1/1762652580.0542989",
- "retrieved_timestamp": "1762652580.0542998",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen-2.5-7B-Deep-Sky-T1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen-2.5-7B-Deep-Sky-T1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42080457630198986
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4139878251775055
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05513595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40181249999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2103557180851064
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Stock-v1/a9fe98a7-e143-4100-99cd-adea90917c4c.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Stock-v1/a9fe98a7-e143-4100-99cd-adea90917c4c.json
deleted file mode 100644
index 9cdf30ea72d68c035abc2db55c27881279084ab9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Stock-v1/a9fe98a7-e143-4100-99cd-adea90917c4c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Deep-Stock-v1/1762652580.054558",
- "retrieved_timestamp": "1762652580.054559",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen-2.5-7B-Deep-Stock-v1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen-2.5-7B-Deep-Stock-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5695066867023941
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5361336083539997
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26435045317220546
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4108958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40658244680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Stock-v4/56ae78dc-3cae-43b0-afc9-e6fac3c6556a.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Stock-v4/56ae78dc-3cae-43b0-afc9-e6fac3c6556a.json
deleted file mode 100644
index 6992447860d2571bb69f6bd8ba2d8717cedfbefc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Stock-v4/56ae78dc-3cae-43b0-afc9-e6fac3c6556a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Deep-Stock-v4/1762652580.054795",
- "retrieved_timestamp": "1762652580.054796",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen-2.5-7B-Deep-Stock-v4",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen-2.5-7B-Deep-Stock-v4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7752862405085175
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5452765042799131
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48942598187311176
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41269791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4341755319148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Stock-v5/39ce157b-e374-4963-8b40-6393835574f5.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Stock-v5/39ce157b-e374-4963-8b40-6393835574f5.json
deleted file mode 100644
index 7ad5aeb50ef2a46218e0cd2641bf2e34ed77366a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Deep-Stock-v5/39ce157b-e374-4963-8b40-6393835574f5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Deep-Stock-v5/1762652580.05501",
- "retrieved_timestamp": "1762652580.055011",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen-2.5-7B-Deep-Stock-v5",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen-2.5-7B-Deep-Stock-v5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45090471061228654
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4672461238794705
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1472809667673716
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3648229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28316156914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Exp-Sce/c57286a9-ee0c-48e7-814e-8f2aa8e9688a.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Exp-Sce/c57286a9-ee0c-48e7-814e-8f2aa8e9688a.json
deleted file mode 100644
index f9c862b49f75cf29ab48490a31eb3b26bb9e997e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Exp-Sce/c57286a9-ee0c-48e7-814e-8f2aa8e9688a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Exp-Sce/1762652580.055233",
- "retrieved_timestamp": "1762652580.055233",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen-2.5-7B-Exp-Sce",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen-2.5-7B-Exp-Sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.765169749597734
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5505865059891896
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3255287009063444
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44302083333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42586436170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-R1-Stock/672e66ed-80e2-4b45-b52c-d9265f8efac8.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-R1-Stock/672e66ed-80e2-4b45-b52c-d9265f8efac8.json
deleted file mode 100644
index 19496e84cbba9d2a97588ca21e4be2732d1f35a1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-R1-Stock/672e66ed-80e2-4b45-b52c-d9265f8efac8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-R1-Stock/1762652580.055454",
- "retrieved_timestamp": "1762652580.055455",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen-2.5-7B-R1-Stock",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen-2.5-7B-R1-Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7573261169253137
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5393363105747148
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5007552870090635
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3993645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.429438164893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Stock-Deep-Bespoke/af89079b-b84e-48f1-876a-ebf2d933d91e.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Stock-Deep-Bespoke/af89079b-b84e-48f1-876a-ebf2d933d91e.json
deleted file mode 100644
index d55ec55db8e1b11733786110aaa5fc561af8b469..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7B-Stock-Deep-Bespoke/af89079b-b84e-48f1-876a-ebf2d933d91e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7B-Stock-Deep-Bespoke/1762652580.0556722",
- "retrieved_timestamp": "1762652580.0556731",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen-2.5-7B-Stock-Deep-Bespoke"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5206219497599702
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49203477801491813
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18882175226586104
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4068020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3579621010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7b-S1k/e7394d5d-4253-4a53-8a0a-73b0a41e62a4.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7b-S1k/e7394d5d-4253-4a53-8a0a-73b0a41e62a4.json
deleted file mode 100644
index c319bd14319d74becc9887f4faea8f0c043fc38e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen-2.5-7b-S1k/e7394d5d-4253-4a53-8a0a-73b0a41e62a4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen-2.5-7b-S1k/1762652580.055886",
- "retrieved_timestamp": "1762652580.0558872",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen-2.5-7b-S1k",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen-2.5-7b-S1k"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7162351449708995
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5562750208035135
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4780966767371601
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4071458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4382480053191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-1.5B-Model-Stock/865ffa1b-af08-416e-8de0-a16091d4ec79.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-1.5B-Model-Stock/865ffa1b-af08-416e-8de0-a16091d4ec79.json
deleted file mode 100644
index 129f7c18bc153cabd468cad714e88d3a830e6efe..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-1.5B-Model-Stock/865ffa1b-af08-416e-8de0-a16091d4ec79.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-1.5B-Model-Stock/1762652580.0561001",
- "retrieved_timestamp": "1762652580.056101",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen2.5-1.5B-Model-Stock",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen2.5-1.5B-Model-Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18292574812608325
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2873695911207613
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3674270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11003989361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.776
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v2/e949a47b-85f9-4072-8302-8bfef92579d9.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v2/e949a47b-85f9-4072-8302-8bfef92579d9.json
deleted file mode 100644
index eb8637a397ad930f11d201afa1d2db7ecfdf6953..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v2/e949a47b-85f9-4072-8302-8bfef92579d9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock-v2/1762652580.0565188",
- "retrieved_timestamp": "1762652580.05652",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen2.5-3B-Model-Stock-v2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen2.5-3B-Model-Stock-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6490157227268093
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46774789186946836
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3867069486404834
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28691275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3914583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3269614361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.396
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v3.1/744d1978-7aa3-44b6-91a0-664383a66f8b.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v3.1/744d1978-7aa3-44b6-91a0-664383a66f8b.json
deleted file mode 100644
index 1a83383b0b5b2ecef0c89a45ad31e81df029dc9b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v3.1/744d1978-7aa3-44b6-91a0-664383a66f8b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock-v3.1/1762652580.056732",
- "retrieved_timestamp": "1762652580.056733",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen2.5-3B-Model-Stock-v3.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen2.5-3B-Model-Stock-v3.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6480915083090644
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.473722298403459
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38972809667673713
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39679166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3289561170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.396
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v3.2/139f2e38-0b98-4bfe-82b0-99a6e6b51e7f.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v3.2/139f2e38-0b98-4bfe-82b0-99a6e6b51e7f.json
deleted file mode 100644
index 3e81e3a01f029a6135ad2f3ff081df814554b0bc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v3.2/139f2e38-0b98-4bfe-82b0-99a6e6b51e7f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock-v3.2/1762652580.05695",
- "retrieved_timestamp": "1762652580.05695",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen2.5-3B-Model-Stock-v3.2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen2.5-3B-Model-Stock-v3.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6353021095138676
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4727417689283166
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37537764350453173
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2835570469798658
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39279166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3293716755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.396
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v4.1/8348f83b-0739-411f-8b87-bd9d5e871ab3.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v4.1/8348f83b-0739-411f-8b87-bd9d5e871ab3.json
deleted file mode 100644
index e0772b17df5523525f9bb8ec6c3915a98dcdd64e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock-v4.1/8348f83b-0739-411f-8b87-bd9d5e871ab3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock-v4.1/1762652580.0571678",
- "retrieved_timestamp": "1762652580.057169",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen2.5-3B-Model-Stock-v4.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen2.5-3B-Model-Stock-v4.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6380747527671025
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48202557906199406
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3768882175226586
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39409374999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3386801861702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.396
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock/4dcf1412-4182-40bd-bd1a-2246e29f18e9.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock/4dcf1412-4182-40bd-bd1a-2246e29f18e9.json
deleted file mode 100644
index e18648a7ee05b411e962f40f3fdefa31be4ccc3f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-Model-Stock/4dcf1412-4182-40bd-bd1a-2246e29f18e9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-Model-Stock/1762652580.056308",
- "retrieved_timestamp": "1762652580.056309",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen2.5-3B-Model-Stock",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen2.5-3B-Model-Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6380747527671025
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4712481909242632
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37990936555891236
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28859060402684567
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39415625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3249667553191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.396
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-RP-Mix/f43b9387-56a9-4c21-850c-5cfda84fc8b5.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-RP-Mix/f43b9387-56a9-4c21-850c-5cfda84fc8b5.json
deleted file mode 100644
index 04cc00cc83a37c08c1a6fe2d4b9d2d3f1f58e6b8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-RP-Mix/f43b9387-56a9-4c21-850c-5cfda84fc8b5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-RP-Mix/1762652580.057388",
- "retrieved_timestamp": "1762652580.057389",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen2.5-3B-RP-Mix",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen2.5-3B-RP-Mix"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5720543712903984
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4894378989397821
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21525679758308158
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27348993288590606
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42844791666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37275598404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.397
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-RP-Thinker-V2/497c8c15-1b77-4468-b33d-efa190c28e78.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-RP-Thinker-V2/497c8c15-1b77-4468-b33d-efa190c28e78.json
deleted file mode 100644
index 6292da827616a3fce9407e2756848aa88db02294..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-RP-Thinker-V2/497c8c15-1b77-4468-b33d-efa190c28e78.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-RP-Thinker-V2/1762652580.057826",
- "retrieved_timestamp": "1762652580.057826",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen2.5-3B-RP-Thinker-V2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen2.5-3B-RP-Thinker-V2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6419965691033125
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46784408133522204
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38293051359516617
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.398125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3271276595744681
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.397
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-RP-Thinker/80cadd5b-ebbd-4f2f-912b-5d944650e2b1.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-RP-Thinker/80cadd5b-ebbd-4f2f-912b-5d944650e2b1.json
deleted file mode 100644
index 282778bc6c093d7bc178b412504316ad6d5063e2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-3B-RP-Thinker/80cadd5b-ebbd-4f2f-912b-5d944650e2b1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-3B-RP-Thinker/1762652580.0576031",
- "retrieved_timestamp": "1762652580.057604",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen2.5-3B-RP-Thinker",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen2.5-3B-RP-Thinker"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.589414974489909
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4164134011392067
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33534743202416917
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3287291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3149933510638298
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.397
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-CyberRombos/1dc11c68-ce65-4a5b-9f75-4cdf1775bfc6.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-CyberRombos/1dc11c68-ce65-4a5b-9f75-4cdf1775bfc6.json
deleted file mode 100644
index 7a74aea2361963adc22a4e81d6eb684f6f8351ae..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-CyberRombos/1dc11c68-ce65-4a5b-9f75-4cdf1775bfc6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-CyberRombos/1762652580.058041",
- "retrieved_timestamp": "1762652580.058042",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen2.5-7B-CyberRombos",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen2.5-7B-CyberRombos"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.751830698103255
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5464960546716063
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4962235649546828
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41254166666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4390791223404255
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-Fuse-Exp/f435a5b0-cc12-4603-b7b0-4625dc547ed2.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-Fuse-Exp/f435a5b0-cc12-4603-b7b0-4625dc547ed2.json
deleted file mode 100644
index ea0480ad6d619906b4f56e4c61796d8c754772c4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-Fuse-Exp/f435a5b0-cc12-4603-b7b0-4625dc547ed2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-Fuse-Exp/1762652580.0583198",
- "retrieved_timestamp": "1762652580.058321",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen2.5-7B-Fuse-Exp",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen2.5-7B-Fuse-Exp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5468501354184675
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5108680600425207
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31419939577039274
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45728125000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3308676861702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-MixStock-Sce-V0.3/daf38e27-1149-44a8-84f2-93f842f4740a.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-MixStock-Sce-V0.3/daf38e27-1149-44a8-84f2-93f842f4740a.json
deleted file mode 100644
index ecd979093487bc003df30e813e427bd0216b1eaa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-MixStock-Sce-V0.3/daf38e27-1149-44a8-84f2-93f842f4740a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-MixStock-Sce-V0.3/1762652580.058998",
- "retrieved_timestamp": "1762652580.058999",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen2.5-7B-MixStock-Sce-V0.3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21197644472222593
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3479005166788895
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25755287009063443
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3713958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17794215425531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-MixStock-V0.1/4a5bb50c-017d-421d-8ea1-21a8316db0f4.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-MixStock-V0.1/4a5bb50c-017d-421d-8ea1-21a8316db0f4.json
deleted file mode 100644
index be0fc7e4eccb0970dd63a9cbcae1c17ad01a61e2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-MixStock-V0.1/4a5bb50c-017d-421d-8ea1-21a8316db0f4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-MixStock-V0.1/1762652580.059214",
- "retrieved_timestamp": "1762652580.059214",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen2.5-7B-MixStock-V0.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen2.5-7B-MixStock-V0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7673428724672757
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5479100568012056
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31722054380664655
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.441625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4256150265957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-R1-Bespoke-Stock/20de3a0f-fad0-4832-863e-2b2049037c4f.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-R1-Bespoke-Stock/20de3a0f-fad0-4832-863e-2b2049037c4f.json
deleted file mode 100644
index a5318692a54a166d3e148d45875c7d6796734c83..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-R1-Bespoke-Stock/20de3a0f-fad0-4832-863e-2b2049037c4f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-R1-Bespoke-Stock/1762652580.059437",
- "retrieved_timestamp": "1762652580.059438",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen2.5-7B-R1-Bespoke-Stock",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen2.5-7B-R1-Bespoke-Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3726445830396681
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48221362910675625
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20468277945619334
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2785234899328859
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3926354166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34715757978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-R1-Bespoke-Task/0f460b31-7249-4e2d-a614-d1230e95f3cf.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-R1-Bespoke-Task/0f460b31-7249-4e2d-a614-d1230e95f3cf.json
deleted file mode 100644
index 9f10c73ee4f071ec1c6f69ee3e4295d908ec9767..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-R1-Bespoke-Task/0f460b31-7249-4e2d-a614-d1230e95f3cf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-R1-Bespoke-Task/1762652580.059654",
- "retrieved_timestamp": "1762652580.059655",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen2.5-7B-R1-Bespoke-Task",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen2.5-7B-R1-Bespoke-Task"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3786641666334215
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41495531490332715
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1782477341389728
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2533557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3568854166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2687832446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-RRP-1M-Thinker/1879a765-f4ab-4bad-9525-47f428b43220.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-RRP-1M-Thinker/1879a765-f4ab-4bad-9525-47f428b43220.json
deleted file mode 100644
index e142670a403e6eb0b0775792dfbce152efd1eb67..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-RRP-1M-Thinker/1879a765-f4ab-4bad-9525-47f428b43220.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-RRP-1M-Thinker/1762652580.060085",
- "retrieved_timestamp": "1762652580.060086",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen2.5-7B-RRP-1M-Thinker",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen2.5-7B-RRP-1M-Thinker"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23081091503876383
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3481907488085136
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2719033232628399
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3767291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1768617021276596
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-RRP-1M/9ec2ac0c-21e8-4c9c-ba5f-69ad284400bb.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-RRP-1M/9ec2ac0c-21e8-4c9c-ba5f-69ad284400bb.json
deleted file mode 100644
index 963d4487554c1a4d4a32885cf484a82a98ca19c8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-RRP-1M/9ec2ac0c-21e8-4c9c-ba5f-69ad284400bb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-RRP-1M/1762652580.059867",
- "retrieved_timestamp": "1762652580.0598679",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen2.5-7B-RRP-1M",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen2.5-7B-RRP-1M"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7481338404322753
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.545239229980545
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.324773413897281
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44826041666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4266123670212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-RRP-ID/85b10038-d136-4be7-8e04-7298ddb4f7d2.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-RRP-ID/85b10038-d136-4be7-8e04-7298ddb4f7d2.json
deleted file mode 100644
index 79f113ef74eb61f9b3d8ff92e18fa72c63bc3acf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-RRP-ID/85b10038-d136-4be7-8e04-7298ddb4f7d2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-RRP-ID/1762652580.0603101",
- "retrieved_timestamp": "1762652580.0603101",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen2.5-7B-RRP-ID",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen2.5-7B-RRP-ID"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.747259493698941
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5479543512061099
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.486404833836858
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41796875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4387466755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-Sky-R1-Mini/c1f39d51-d7a2-4fee-ba35-ef4e0d429b29.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-Sky-R1-Mini/c1f39d51-d7a2-4fee-ba35-ef4e0d429b29.json
deleted file mode 100644
index de1beac6b8a631af7bd1ab913fa159e11878f341..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_Qwen2.5-7B-Sky-R1-Mini/c1f39d51-d7a2-4fee-ba35-ef4e0d429b29.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-Sky-R1-Mini/1762652580.061045",
- "retrieved_timestamp": "1762652580.0610461",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen2.5-7B-Sky-R1-Mini",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen2.5-7B-Sky-R1-Mini"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23048622100471194
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3502939195575525
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02945619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3448229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12533244680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_QwenMosaic-7B/4fcee29d-6351-4875-995d-81834fd878c3.json b/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_QwenMosaic-7B/4fcee29d-6351-4875-995d-81834fd878c3.json
deleted file mode 100644
index 22a4354edf52dafa4e012ca3523652c058790605..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/bunnycore_QwenMosaic-7B/4fcee29d-6351-4875-995d-81834fd878c3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_QwenMosaic-7B/1762652580.061329",
- "retrieved_timestamp": "1762652580.0613298",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/QwenMosaic-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "bunnycore/QwenMosaic-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5819215237791282
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5564132127895585
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44410876132930516
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4163854166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43101728723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/cognitivecomputations_Dolphin3.0-Qwen2.5-0.5B/4b0c69d9-1801-4a54-9554-d8dcff88f9a3.json b/leaderboard_data/HFOpenLLMv2/alibaba/cognitivecomputations_Dolphin3.0-Qwen2.5-0.5B/4b0c69d9-1801-4a54-9554-d8dcff88f9a3.json
deleted file mode 100644
index fe5a50f4c643570d8e6b9baed399c9f16fe54a25..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/cognitivecomputations_Dolphin3.0-Qwen2.5-0.5B/4b0c69d9-1801-4a54-9554-d8dcff88f9a3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/cognitivecomputations_Dolphin3.0-Qwen2.5-0.5B/1762652580.112457",
- "retrieved_timestamp": "1762652580.112458",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "cognitivecomputations/Dolphin3.0-Qwen2.5-0.5B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4697136930012367
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31142229157184026
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0513595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2348993288590604
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35545833333333327
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14128989361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/cognitivecomputations_dolphin-2.9.2-qwen2-72b/5d3c9637-0558-4a2e-9950-8e7017d013f8.json b/leaderboard_data/HFOpenLLMv2/alibaba/cognitivecomputations_dolphin-2.9.2-qwen2-72b/5d3c9637-0558-4a2e-9950-8e7017d013f8.json
deleted file mode 100644
index 9f7d4d86618dc5f62b2d6022d377233c36ae7264..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/cognitivecomputations_dolphin-2.9.2-qwen2-72b/5d3c9637-0558-4a2e-9950-8e7017d013f8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.2-qwen2-72b/1762652580.114711",
- "retrieved_timestamp": "1762652580.114712",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "cognitivecomputations/dolphin-2.9.2-qwen2-72b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "cognitivecomputations/dolphin-2.9.2-qwen2-72b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6343778950961227
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6296364939584073
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802114803625378
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3699664429530201
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45207291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.547124335106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/cognitivecomputations_dolphin-2.9.2-qwen2-7b/c04e8c21-3ae1-457a-9609-682341323a88.json b/leaderboard_data/HFOpenLLMv2/alibaba/cognitivecomputations_dolphin-2.9.2-qwen2-7b/c04e8c21-3ae1-457a-9609-682341323a88.json
deleted file mode 100644
index 5e6e0f4ff3657ab9c6d9e20aeb2614aae2393aec..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/cognitivecomputations_dolphin-2.9.2-qwen2-7b/c04e8c21-3ae1-457a-9609-682341323a88.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.2-qwen2-7b/1762652580.114933",
- "retrieved_timestamp": "1762652580.114934",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "cognitivecomputations/dolphin-2.9.2-qwen2-7b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "cognitivecomputations/dolphin-2.9.2-qwen2-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3534599307614906
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48938263759195594
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13444108761329304
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41914583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4050864361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-1.5B/d38f0e3a-e89e-4af6-95b2-8230b6a84ec3.json b/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-1.5B/d38f0e3a-e89e-4af6-95b2-8230b6a84ec3.json
deleted file mode 100644
index 93d29e7a1833b523445df042cf28e00cc718cc8e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-1.5B/d38f0e3a-e89e-4af6-95b2-8230b6a84ec3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Qwen-1.5B/1762652580.121964",
- "retrieved_timestamp": "1762652580.1219652",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34634104176917246
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32409879947333436
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1691842900302115
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2558724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36345833333333327
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11868351063829788
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-14B/77e70ef3-fef2-4b75-9221-b165ec29f31e.json b/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-14B/77e70ef3-fef2-4b75-9221-b165ec29f31e.json
deleted file mode 100644
index e2cbf872e9c6af5480423dacd091abf9187da799..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-14B/77e70ef3-fef2-4b75-9221-b165ec29f31e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Qwen-14B/1762652580.122241",
- "retrieved_timestamp": "1762652580.122248",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43816517950150047
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5905573130283358
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5702416918429003
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3875838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.536625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4666722074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-32B/6731c6b8-0b23-4fc2-b284-01025ce30887.json b/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-32B/6731c6b8-0b23-4fc2-b284-01025ce30887.json
deleted file mode 100644
index 637e2740b5f2b97a72e2848d0dfe8ac8cda29932..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-32B/6731c6b8-0b23-4fc2-b284-01025ce30887.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Qwen-32B/1762652580.12255",
- "retrieved_timestamp": "1762652580.1225522",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4186314534324481
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41969150892898055
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17069486404833836
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4526041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46866688829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-7B/4cb8eae2-bc55-4adb-a4eb-1fc9eb29d891.json b/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-7B/4cb8eae2-bc55-4adb-a4eb-1fc9eb29d891.json
deleted file mode 100644
index f00702a37c1674bcb649807ab720fd4b6967e510..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/deepseek-ai_DeepSeek-R1-Distill-Qwen-7B/4cb8eae2-bc55-4adb-a4eb-1fc9eb29d891.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/deepseek-ai_DeepSeek-R1-Distill-Qwen-7B/1762652580.1228092",
- "retrieved_timestamp": "1762652580.1228101",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40376866713653103
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34425676981862185
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19561933534743203
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36628124999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2321309840425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/dfurman_Qwen2-72B-Orpo-v0.1/b197728d-b390-45a8-8adc-ed8567b628da.json b/leaderboard_data/HFOpenLLMv2/alibaba/dfurman_Qwen2-72B-Orpo-v0.1/b197728d-b390-45a8-8adc-ed8567b628da.json
deleted file mode 100644
index b70ac13d10d3470ece4d247b1e980ad7c5279c51..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/dfurman_Qwen2-72B-Orpo-v0.1/b197728d-b390-45a8-8adc-ed8567b628da.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dfurman_Qwen2-72B-Orpo-v0.1/1762652580.125584",
- "retrieved_timestamp": "1762652580.1255848",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dfurman/Qwen2-72B-Orpo-v0.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "dfurman/Qwen2-72B-Orpo-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7879759039348928
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6969024790545039
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40558912386706947
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38422818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47842708333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5454621010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.699
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_QwenQwen2.5-7B-IT-Dare/09deb823-536f-4afc-95bf-ebb0a8eb2e00.json b/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_QwenQwen2.5-7B-IT-Dare/09deb823-536f-4afc-95bf-ebb0a8eb2e00.json
deleted file mode 100644
index bdfaf1c971d40d3656cae492ecb8525395cc67e3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_QwenQwen2.5-7B-IT-Dare/09deb823-536f-4afc-95bf-ebb0a8eb2e00.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_QwenQwen2.5-7B-IT-Dare/1762652580.1400871",
- "retrieved_timestamp": "1762652580.140088",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/QwenQwen2.5-7B-IT-Dare",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "ehristoforu/QwenQwen2.5-7B-IT-Dare"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7509064836855099
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5397962708415814
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5090634441087614
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4033645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4289394946808511
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_QwenQwen2.5-7B-IT/30f8faa5-777f-47bc-b128-f31b950079a3.json b/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_QwenQwen2.5-7B-IT/30f8faa5-777f-47bc-b128-f31b950079a3.json
deleted file mode 100644
index 4cfdd36270c2ea3ad86ab86c75e59714d0438ef8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_QwenQwen2.5-7B-IT/30f8faa5-777f-47bc-b128-f31b950079a3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_QwenQwen2.5-7B-IT/1762652580.1398232",
- "retrieved_timestamp": "1762652580.1398232",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/QwenQwen2.5-7B-IT",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "ehristoforu/QwenQwen2.5-7B-IT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.751830698103255
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5397962708415814
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5090634441087614
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4033645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4289394946808511
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_RQwen-v0.1/93187c79-f1a4-45f9-9d95-a254a185f7a4.json b/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_RQwen-v0.1/93187c79-f1a4-45f9-9d95-a254a185f7a4.json
deleted file mode 100644
index 625dbebd0b7fe8837652d2f6739dad1f28787379..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_RQwen-v0.1/93187c79-f1a4-45f9-9d95-a254a185f7a4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_RQwen-v0.1/1762652580.140311",
- "retrieved_timestamp": "1762652580.140312",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/RQwen-v0.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "ehristoforu/RQwen-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7624968417133207
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6446435015804635
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4645015105740181
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32550335570469796
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41390625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5201961436170213
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_RQwen-v0.2/69318100-73ee-47f4-96b2-6e7b310fbcd1.json b/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_RQwen-v0.2/69318100-73ee-47f4-96b2-6e7b310fbcd1.json
deleted file mode 100644
index 2670e8a91b5043629ca61561934e9aaa6a29ee9f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_RQwen-v0.2/69318100-73ee-47f4-96b2-6e7b310fbcd1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_RQwen-v0.2/1762652580.140525",
- "retrieved_timestamp": "1762652580.140526",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/RQwen-v0.2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "ehristoforu/RQwen-v0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7503568309862276
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6426888858891955
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3270392749244713
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.337248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4206666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.515874335106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_coolqwen-3b-it/5aab957b-f25b-4208-9bf8-2d16887245bc.json b/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_coolqwen-3b-it/5aab957b-f25b-4208-9bf8-2d16887245bc.json
deleted file mode 100644
index f94dc3c97caba3b9479d65fa4f8cbcb805fd7567..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_coolqwen-3b-it/5aab957b-f25b-4208-9bf8-2d16887245bc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_coolqwen-3b-it/1762652580.140961",
- "retrieved_timestamp": "1762652580.1409621",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/coolqwen-3b-it",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "ehristoforu/coolqwen-3b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6472670292601409
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.485089343991756
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36706948640483383
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41251041666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3601230053191489
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.085
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_frqwen2.5-from7b-duable4layers-it/b2c0f0f2-3c1d-4b2a-a82d-24001cbfd3d7.json b/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_frqwen2.5-from7b-duable4layers-it/b2c0f0f2-3c1d-4b2a-a82d-24001cbfd3d7.json
deleted file mode 100644
index 65550aa5fb3aca921ec511324e1d976981097637..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_frqwen2.5-from7b-duable4layers-it/b2c0f0f2-3c1d-4b2a-a82d-24001cbfd3d7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_frqwen2.5-from7b-duable4layers-it/1762652580.1428769",
- "retrieved_timestamp": "1762652580.1428769",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/frqwen2.5-from7b-duable4layers-it",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "ehristoforu/frqwen2.5-from7b-duable4layers-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7728881589737453
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5263561044354216
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4509063444108761
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4165729166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4126496010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 8.545
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_frqwen2.5-from7b-it/26034d5d-5d52-40d8-aa9b-e90dbd255903.json b/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_frqwen2.5-from7b-it/26034d5d-5d52-40d8-aa9b-e90dbd255903.json
deleted file mode 100644
index cd5eca5a41f2ecbb8e19c2bcf30afd2659eb2f75..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_frqwen2.5-from7b-it/26034d5d-5d52-40d8-aa9b-e90dbd255903.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_frqwen2.5-from7b-it/1762652580.143308",
- "retrieved_timestamp": "1762652580.143309",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/frqwen2.5-from7b-it",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "ehristoforu/frqwen2.5-from7b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6532123654126606
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5142906815349029
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29229607250755285
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4085729166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3976894946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 13.206
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_qwen2.5-test-32b-it/606d699f-c7ac-4e5b-b5a3-5bd43f0a3ff6.json b/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_qwen2.5-test-32b-it/606d699f-c7ac-4e5b-b5a3-5bd43f0a3ff6.json
deleted file mode 100644
index a8925d02eacf9c35943f9b9cd34bf9c2756d5e50..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_qwen2.5-test-32b-it/606d699f-c7ac-4e5b-b5a3-5bd43f0a3ff6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_qwen2.5-test-32b-it/1762652580.144918",
- "retrieved_timestamp": "1762652580.1449192",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/qwen2.5-test-32b-it",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "ehristoforu/qwen2.5-test-32b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7889499860370484
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.708059329453303
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5974320241691843
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3640939597315436
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4578125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5765458776595744
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_qwen2.5-with-lora-think-3b-it/6c40f966-753b-4301-8c9b-f7b4905c0b68.json b/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_qwen2.5-with-lora-think-3b-it/6c40f966-753b-4301-8c9b-f7b4905c0b68.json
deleted file mode 100644
index 9698b20c31b14503f491361491149db9a1287b70..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/ehristoforu_qwen2.5-with-lora-think-3b-it/6c40f966-753b-4301-8c9b-f7b4905c0b68.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_qwen2.5-with-lora-think-3b-it/1762652580.1451252",
- "retrieved_timestamp": "1762652580.1451259",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/qwen2.5-with-lora-think-3b-it",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "ehristoforu/qwen2.5-with-lora-think-3b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5319374814381397
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4686847308109022
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.236404833836858
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43095833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3402593085106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/freewheelin_free-evo-qwen72b-v0.8-re/cfb071af-7283-4155-8ce1-40f751dd46ec.json b/leaderboard_data/HFOpenLLMv2/alibaba/freewheelin_free-evo-qwen72b-v0.8-re/cfb071af-7283-4155-8ce1-40f751dd46ec.json
deleted file mode 100644
index 8067752eb832631b2f66ba47ccb828667c001c5e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/freewheelin_free-evo-qwen72b-v0.8-re/cfb071af-7283-4155-8ce1-40f751dd46ec.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/freewheelin_free-evo-qwen72b-v0.8-re/1762652580.161332",
- "retrieved_timestamp": "1762652580.161333",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "freewheelin/free-evo-qwen72b-v0.8-re",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "freewheelin/free-evo-qwen72b-v0.8-re"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.533086654521115
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6127477065378042
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18051359516616314
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3565436241610738
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4871666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4870345744680851
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 72.288
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_2500_qwen/84ad6756-cb9d-4303-8e7a-395c1dc7c222.json b/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_2500_qwen/84ad6756-cb9d-4303-8e7a-395c1dc7c222.json
deleted file mode 100644
index a49c77f8136a8bb6ad3cc61a47420ac274ea775e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_2500_qwen/84ad6756-cb9d-4303-8e7a-395c1dc7c222.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_ifd_2500_qwen/1762652580.170526",
- "retrieved_timestamp": "1762652580.170526",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/ifd_2500_qwen",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "godlikehhd/ifd_2500_qwen"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33647388928044253
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42983047351897224
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09818731117824774
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36146875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2921376329787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_new_correct_all_sample_2500_qwen/b481d1bd-e678-4b78-aecb-d43a561dd969.json b/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_new_correct_all_sample_2500_qwen/b481d1bd-e678-4b78-aecb-d43a561dd969.json
deleted file mode 100644
index 24a6d9d8a858e4815fe4863a0e47b3377faf028a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_new_correct_all_sample_2500_qwen/b481d1bd-e678-4b78-aecb-d43a561dd969.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_ifd_new_correct_all_sample_2500_qwen/1762652580.170775",
- "retrieved_timestamp": "1762652580.1707761",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/ifd_new_correct_all_sample_2500_qwen",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "godlikehhd/ifd_new_correct_all_sample_2500_qwen"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33757319467900726
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4019641175400575
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09592145015105741
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3561666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2888962765957447
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_new_correct_sample_2500_qwen/c42196be-c20b-413d-8870-f10759058098.json b/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_new_correct_sample_2500_qwen/c42196be-c20b-413d-8870-f10759058098.json
deleted file mode 100644
index 7c36ac9107d51690a2f9ed99b0370cd17fc69986..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_new_correct_sample_2500_qwen/c42196be-c20b-413d-8870-f10759058098.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_ifd_new_correct_sample_2500_qwen/1762652580.170979",
- "retrieved_timestamp": "1762652580.1709802",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/ifd_new_correct_sample_2500_qwen",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "godlikehhd/ifd_new_correct_sample_2500_qwen"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33974631754854895
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41103125849665423
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3078859060402685
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3626770833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.293218085106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_new_qwen_2500/8d8663a1-12f6-4e88-af3d-784ff86e8c59.json b/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_new_qwen_2500/8d8663a1-12f6-4e88-af3d-784ff86e8c59.json
deleted file mode 100644
index 48951a77cf0eb5eab6fd36df79ab6815222b1f72..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_ifd_new_qwen_2500/8d8663a1-12f6-4e88-af3d-784ff86e8c59.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_ifd_new_qwen_2500/1762652580.171179",
- "retrieved_timestamp": "1762652580.17118",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/ifd_new_qwen_2500",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "godlikehhd/ifd_new_qwen_2500"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.323959316834887
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41598162527775745
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11178247734138973
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3589583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29105718085106386
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen-2.5-1.5b-cherry/a0621e6d-4178-49c9-aa2b-f56930884b82.json b/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen-2.5-1.5b-cherry/a0621e6d-4178-49c9-aa2b-f56930884b82.json
deleted file mode 100644
index ebfaec91a1369e9b9ff7e245767eeb79b224067f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen-2.5-1.5b-cherry/a0621e6d-4178-49c9-aa2b-f56930884b82.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_qwen-2.5-1.5b-cherry/1762652580.1715672",
- "retrieved_timestamp": "1762652580.1715689",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/qwen-2.5-1.5b-cherry",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "godlikehhd/qwen-2.5-1.5b-cherry"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28933784580468713
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40357573315752204
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10196374622356495
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.345625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29230385638297873
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.772
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen_2.5-1.5b-cherry_new/dd0260dd-59f7-4b3d-8f9c-60b297c07a1b.json b/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen_2.5-1.5b-cherry_new/dd0260dd-59f7-4b3d-8f9c-60b297c07a1b.json
deleted file mode 100644
index fabac2360fa6eb3fdc9d6c5699fd1de19c89db65..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen_2.5-1.5b-cherry_new/dd0260dd-59f7-4b3d-8f9c-60b297c07a1b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_qwen_2.5-1.5b-cherry_new/1762652580.171904",
- "retrieved_timestamp": "1762652580.171905",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/qwen_2.5-1.5b-cherry_new",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "godlikehhd/qwen_2.5-1.5b-cherry_new"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3120442647730245
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4149628386006759
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09667673716012085
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34959375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28939494680851063
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen_full_data_alpaca/746630a6-de1d-4976-9168-d8ff06980904.json b/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen_full_data_alpaca/746630a6-de1d-4976-9168-d8ff06980904.json
deleted file mode 100644
index 51da10fab34343229069852b18bb03c4b97707d1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen_full_data_alpaca/746630a6-de1d-4976-9168-d8ff06980904.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_qwen_full_data_alpaca/1762652580.1721501",
- "retrieved_timestamp": "1762652580.172151",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/qwen_full_data_alpaca",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "godlikehhd/qwen_full_data_alpaca"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3136178672588731
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4229212208733662
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09214501510574018
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29278523489932884
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40515625000000005
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28507313829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen_ins_ans_2500/7f577380-2691-4906-af13-8ca3011e6316.json b/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen_ins_ans_2500/7f577380-2691-4906-af13-8ca3011e6316.json
deleted file mode 100644
index f1e005c53b94115aa00dd1ed66d4e3b342e5adc4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/godlikehhd_qwen_ins_ans_2500/7f577380-2691-4906-af13-8ca3011e6316.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_qwen_ins_ans_2500/1762652580.172384",
- "retrieved_timestamp": "1762652580.172385",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/qwen_ins_ans_2500",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "godlikehhd/qwen_ins_ans_2500"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2698041197356348
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4073950292977672
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11404833836858005
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3588645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28091755319148937
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.1/9b6c775b-ef08-4e57-8441-52d7887615b1.json b/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.1/9b6c775b-ef08-4e57-8441-52d7887615b1.json
deleted file mode 100644
index 7eb8c63e7ba64d88a3227618e5ffffaa59cccecf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.1/9b6c775b-ef08-4e57-8441-52d7887615b1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/gz987_qwen2.5-7b-cabs-v0.1/1762652580.187419",
- "retrieved_timestamp": "1762652580.18742",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "gz987/qwen2.5-7b-cabs-v0.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "gz987/qwen2.5-7b-cabs-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7505817896514582
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5481580818735207
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.479607250755287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.437625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4405751329787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.2/7288fa97-efd7-45d5-8769-e0071e9b5488.json b/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.2/7288fa97-efd7-45d5-8769-e0071e9b5488.json
deleted file mode 100644
index 14c53ba6c6a626b75db28b795274ed10453fb397..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.2/7288fa97-efd7-45d5-8769-e0071e9b5488.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/gz987_qwen2.5-7b-cabs-v0.2/1762652580.18783",
- "retrieved_timestamp": "1762652580.187832",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "gz987/qwen2.5-7b-cabs-v0.2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "gz987/qwen2.5-7b-cabs-v0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7417640748768822
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5516262466675281
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4901812688821752
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44286458333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43974401595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.3/b664e033-1424-431e-af8d-09a11b449286.json b/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.3/b664e033-1424-431e-af8d-09a11b449286.json
deleted file mode 100644
index 0f7cec49a57a093b1665645a55e32386e7a2ae7d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.3/b664e033-1424-431e-af8d-09a11b449286.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/gz987_qwen2.5-7b-cabs-v0.3/1762652580.188173",
- "retrieved_timestamp": "1762652580.188174",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "gz987/qwen2.5-7b-cabs-v0.3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "gz987/qwen2.5-7b-cabs-v0.3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7569515552068511
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5494465314719504
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.493202416918429
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44295833333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4401595744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.4/8fb7a2aa-3f43-4aaf-b2c0-1770704fcf81.json b/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.4/8fb7a2aa-3f43-4aaf-b2c0-1770704fcf81.json
deleted file mode 100644
index a2cc4c57270b3a31ef02c545a9d863239fe7c750..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/gz987_qwen2.5-7b-cabs-v0.4/8fb7a2aa-3f43-4aaf-b2c0-1770704fcf81.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/gz987_qwen2.5-7b-cabs-v0.4/1762652580.188425",
- "retrieved_timestamp": "1762652580.188426",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "gz987/qwen2.5-7b-cabs-v0.4",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "gz987/qwen2.5-7b-cabs-v0.4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7582503313430586
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5524401094760039
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48489425981873113
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44295833333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4395777925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_Deepseek-qwen-modelstock-2B/15a4291f-4918-43a6-b242-90db88fe4a3d.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_Deepseek-qwen-modelstock-2B/15a4291f-4918-43a6-b242-90db88fe4a3d.json
deleted file mode 100644
index df645e8155e23b3c53627cce422d93e986276900..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_Deepseek-qwen-modelstock-2B/15a4291f-4918-43a6-b242-90db88fe4a3d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_Deepseek-qwen-modelstock-2B/1762652580.1914759",
- "retrieved_timestamp": "1762652580.191477",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/Deepseek-qwen-modelstock-2B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "hotmailuser/Deepseek-qwen-modelstock-2B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21487431127186973
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3549242330959277
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33987915407854985
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34745833333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19107380319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_Qwen2.5-HomerSlerp-7B/9c7dab43-b26d-4cb4-a73c-95bb1e01ffe8.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_Qwen2.5-HomerSlerp-7B/9c7dab43-b26d-4cb4-a73c-95bb1e01ffe8.json
deleted file mode 100644
index 8eea96b895e8dbc6e0dd0cab533fa74a01096398..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_Qwen2.5-HomerSlerp-7B/9c7dab43-b26d-4cb4-a73c-95bb1e01ffe8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_Qwen2.5-HomerSlerp-7B/1762652580.1961112",
- "retrieved_timestamp": "1762652580.1961112",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/Qwen2.5-HomerSlerp-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "hotmailuser/Qwen2.5-HomerSlerp-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44878145542715553
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5632506117591088
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33157099697885195
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4383333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4548703457446808
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenModelStock-1.8B/661b1590-f312-447b-a494-1d37ffd93cae.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenModelStock-1.8B/661b1590-f312-447b-a494-1d37ffd93cae.json
deleted file mode 100644
index 870b138eeae863e485170b1eaf789462aaded947..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenModelStock-1.8B/661b1590-f312-447b-a494-1d37ffd93cae.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_QwenModelStock-1.8B/1762652580.196316",
- "retrieved_timestamp": "1762652580.196316",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/QwenModelStock-1.8B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "hotmailuser/QwenModelStock-1.8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3263075306852484
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41881762650909504
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09894259818731117
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28691275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4359166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2958776595744681
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp-14B/83387977-a8cd-4cdd-abc7-301006380458.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp-14B/83387977-a8cd-4cdd-abc7-301006380458.json
deleted file mode 100644
index 28b5379c58f1d60ea2ac3c797ceab6bd85dab5f6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp-14B/83387977-a8cd-4cdd-abc7-301006380458.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp-14B/1762652580.1965241",
- "retrieved_timestamp": "1762652580.196525",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/QwenSlerp-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "hotmailuser/QwenSlerp-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7024716640735471
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6491286917834284
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38368580060422963
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3875838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4634479166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5399767287234043
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp-3B/7f53fb66-2c19-434a-acec-7cdcf9fce04d.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp-3B/7f53fb66-2c19-434a-acec-7cdcf9fce04d.json
deleted file mode 100644
index 091bbb3373fbc99fd6d09aad35a30a7ddb5204a1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp-3B/7f53fb66-2c19-434a-acec-7cdcf9fce04d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp-3B/1762652580.1967301",
- "retrieved_timestamp": "1762652580.1967309",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/QwenSlerp-3B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "hotmailuser/QwenSlerp-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4333690164319561
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4892345530653528
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27492447129909364
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43166666666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3693484042553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.397
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp-7B/4f8db3ee-409a-4bac-ab0a-ee3493d1e842.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp-7B/4f8db3ee-409a-4bac-ab0a-ee3493d1e842.json
deleted file mode 100644
index 9cdf5880fe0b91072c0ad749f9c8f47ada26e797..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp-7B/4f8db3ee-409a-4bac-ab0a-ee3493d1e842.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp-7B/1762652580.197109",
- "retrieved_timestamp": "1762652580.19711",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/QwenSlerp-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "hotmailuser/QwenSlerp-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4672912317096415
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5636352508232924
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34441087613293053
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3179530201342282
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4409375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45088098404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp2-14B/6732a278-0613-40fd-bdbc-88a586631279.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp2-14B/6732a278-0613-40fd-bdbc-88a586631279.json
deleted file mode 100644
index c3cae08f9e6b9f9c677bc8f9896151fdd1e0e9b0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp2-14B/6732a278-0613-40fd-bdbc-88a586631279.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp2-14B/1762652580.197355",
- "retrieved_timestamp": "1762652580.197356",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/QwenSlerp2-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "hotmailuser/QwenSlerp2-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7036707048409332
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6492799322983842
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39652567975830816
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48065625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5378989361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp2-3B/cc53c4f9-3c1b-4b21-9aac-ea22dced76c3.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp2-3B/cc53c4f9-3c1b-4b21-9aac-ea22dced76c3.json
deleted file mode 100644
index c9091ed860e447461d502a639d2a8fdac701aa42..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp2-3B/cc53c4f9-3c1b-4b21-9aac-ea22dced76c3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp2-3B/1762652580.197566",
- "retrieved_timestamp": "1762652580.197566",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/QwenSlerp2-3B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "hotmailuser/QwenSlerp2-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4280486885907171
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4801760257099328
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26057401812688824
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4251875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3741688829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.397
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp3-14B/7d2c1ffb-d1e7-4c88-af08-74642ddd8741.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp3-14B/7d2c1ffb-d1e7-4c88-af08-74642ddd8741.json
deleted file mode 100644
index 909fe07ae69a651579550ca6223e35634bbdad29..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSlerp3-14B/7d2c1ffb-d1e7-4c88-af08-74642ddd8741.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSlerp3-14B/1762652580.197938",
- "retrieved_timestamp": "1762652580.1979399",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/QwenSlerp3-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "hotmailuser/QwenSlerp3-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6632291209546226
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6266526215170748
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43051359516616317
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36661073825503354
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48078125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5262632978723404
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSparse-7B/96bbc2c8-bb74-408d-8625-e6bf66b63cd0.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSparse-7B/96bbc2c8-bb74-408d-8625-e6bf66b63cd0.json
deleted file mode 100644
index f6b52f75593fc8edebc2f6e439a9bc128aadb0b6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenSparse-7B/96bbc2c8-bb74-408d-8625-e6bf66b63cd0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_QwenSparse-7B/1762652580.198252",
- "retrieved_timestamp": "1762652580.198254",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/QwenSparse-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "hotmailuser/QwenSparse-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10858632871891026
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28956619468137906
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.010574018126888218
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35622916666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11220079787234043
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenStock-0.5B/72853b4d-cc12-478f-b6f4-977b8fbabfa0.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenStock-0.5B/72853b4d-cc12-478f-b6f4-977b8fbabfa0.json
deleted file mode 100644
index 31ab8f73038dcae2b4acf85f2e97b8793977020d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenStock-0.5B/72853b4d-cc12-478f-b6f4-977b8fbabfa0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_QwenStock-0.5B/1762652580.198598",
- "retrieved_timestamp": "1762652580.1985989",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/QwenStock-0.5B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "hotmailuser/QwenStock-0.5B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20490742341431845
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911778102988436
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35753125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11668882978723404
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenStock-1.7B/25674b98-92b5-4e2d-97ab-084eabb13db2.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenStock-1.7B/25674b98-92b5-4e2d-97ab-084eabb13db2.json
deleted file mode 100644
index ab39ec2ea3e9b667c565149e95e0cb2a4e3e4518..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenStock-1.7B/25674b98-92b5-4e2d-97ab-084eabb13db2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_QwenStock-1.7B/1762652580.1988428",
- "retrieved_timestamp": "1762652580.198844",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/QwenStock-1.7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "hotmailuser/QwenStock-1.7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32141163224688274
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4187550547805281
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09969788519637462
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.287751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44121875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2954621010638298
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenStock1-14B/67fd0572-cf55-412d-8ec6-0cb168d3ed08.json b/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenStock1-14B/67fd0572-cf55-412d-8ec6-0cb168d3ed08.json
deleted file mode 100644
index 602e47ce0561bb622571e94180db7495a1480f13..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/hotmailuser_QwenStock1-14B/67fd0572-cf55-412d-8ec6-0cb168d3ed08.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_QwenStock1-14B/1762652580.1990862",
- "retrieved_timestamp": "1762652580.1990871",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/QwenStock1-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "hotmailuser/QwenStock1-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6693240601603745
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6502248812491821
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37009063444108764
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3859060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47811458333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5416389627659575
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/huihui-ai_DeepSeek-R1-Distill-Qwen-14B-abliterated-v2/69d04754-3779-4408-9aa9-68c9ba65de7a.json b/leaderboard_data/HFOpenLLMv2/alibaba/huihui-ai_DeepSeek-R1-Distill-Qwen-14B-abliterated-v2/69d04754-3779-4408-9aa9-68c9ba65de7a.json
deleted file mode 100644
index 442eb032543ac3ccfb3f94f60850f690f7507c7b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/huihui-ai_DeepSeek-R1-Distill-Qwen-14B-abliterated-v2/69d04754-3779-4408-9aa9-68c9ba65de7a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/huihui-ai_DeepSeek-R1-Distill-Qwen-14B-abliterated-v2/1762652580.200386",
- "retrieved_timestamp": "1762652580.200386",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "huihui-ai/DeepSeek-R1-Distill-Qwen-14B-abliterated-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42112927033604175
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34869240677927044
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22054380664652568
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47006250000000005
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19148936170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jayasuryajsk_Qwen2.5-3B-reasoner/91c0e116-7dc0-4931-ac61-b98bac2af3e0.json b/leaderboard_data/HFOpenLLMv2/alibaba/jayasuryajsk_Qwen2.5-3B-reasoner/91c0e116-7dc0-4931-ac61-b98bac2af3e0.json
deleted file mode 100644
index c196f2d7a4c11f38e67f9eacfca5191aeda2cea5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/jayasuryajsk_Qwen2.5-3B-reasoner/91c0e116-7dc0-4931-ac61-b98bac2af3e0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jayasuryajsk_Qwen2.5-3B-reasoner/1762652580.280263",
- "retrieved_timestamp": "1762652580.280264",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jayasuryajsk/Qwen2.5-3B-reasoner",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "jayasuryajsk/Qwen2.5-3B-reasoner"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4159585455480348
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46511772991620703
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2084592145015106
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41229166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3482380319148936
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeanmichela_o-distil-qwen/172e7bfa-b430-4e14-a15a-a54ec5c9133e.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeanmichela_o-distil-qwen/172e7bfa-b430-4e14-a15a-a54ec5c9133e.json
deleted file mode 100644
index 3ca4d79d6660a195db49a785a2338354193bda31..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/jeanmichela_o-distil-qwen/172e7bfa-b430-4e14-a15a-a54ec5c9133e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jeanmichela_o-distil-qwen/1762652580.280534",
- "retrieved_timestamp": "1762652580.280535",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jeanmichela/o-distil-qwen",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "jeanmichela/o-distil-qwen"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44823180272787316
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5900367438200601
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5649546827794562
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3934563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5339895833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46575797872340424
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jebish7_qwen2.5-0.5B-IHA-Hin/5849d742-02eb-4370-8c97-efc5eec4f1ed.json b/leaderboard_data/HFOpenLLMv2/alibaba/jebish7_qwen2.5-0.5B-IHA-Hin/5849d742-02eb-4370-8c97-efc5eec4f1ed.json
deleted file mode 100644
index 8df8381b887406c5462ff6d3234559f6ab77ad71..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/jebish7_qwen2.5-0.5B-IHA-Hin/5849d742-02eb-4370-8c97-efc5eec4f1ed.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jebish7_qwen2.5-0.5B-IHA-Hin/1762652580.28294",
- "retrieved_timestamp": "1762652580.28294",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jebish7/qwen2.5-0.5B-IHA-Hin",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "jebish7/qwen2.5-0.5B-IHA-Hin"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14163419726326149
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29891753632624085
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2525167785234899
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34748958333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.109375
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen-7B-nerd-uncensored-v1.0/1812829e-2c91-410e-9e2e-cc758b652e9b.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen-7B-nerd-uncensored-v1.0/1812829e-2c91-410e-9e2e-cc758b652e9b.json
deleted file mode 100644
index bbbeea6d25a8592d636c087ab1d6a6d34388a6e0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen-7B-nerd-uncensored-v1.0/1812829e-2c91-410e-9e2e-cc758b652e9b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen-7B-nerd-uncensored-v1.0/1762652580.283215",
- "retrieved_timestamp": "1762652580.2832158",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jeffmeloy/Qwen-7B-nerd-uncensored-v1.0",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "jeffmeloy/Qwen-7B-nerd-uncensored-v1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6135952605752737
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5421083753999172
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28700906344410876
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32802013422818793
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47929166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4362533244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-minperplexity-2/593d3d30-f2e8-4ad3-b0ab-4bfed63a0ab5.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-minperplexity-2/593d3d30-f2e8-4ad3-b0ab-4bfed63a0ab5.json
deleted file mode 100644
index c875f594a33d3d4ea42910af473501c50e5b30d0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-minperplexity-2/593d3d30-f2e8-4ad3-b0ab-4bfed63a0ab5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-minperplexity-2/1762652580.28349",
- "retrieved_timestamp": "1762652580.2834911",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jeffmeloy/Qwen2.5-7B-minperplexity-2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "jeffmeloy/Qwen2.5-7B-minperplexity-2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.509730847484674
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.552390586276348
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3013595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.311241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46245833333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4345910904255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v0.9/45a72c39-9cdb-4fb6-aaf0-d50cc89dfd70.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v0.9/45a72c39-9cdb-4fb6-aaf0-d50cc89dfd70.json
deleted file mode 100644
index 188fc3baa8d1a003e56ebf71ce20f2359256267e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v0.9/45a72c39-9cdb-4fb6-aaf0-d50cc89dfd70.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v0.9/1762652580.2837172",
- "retrieved_timestamp": "1762652580.2837179",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v0.9"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6048274134851084
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5469701834138724
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2945619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32298657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48198958333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4363364361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.0/ee2b789c-951d-426e-87e3-232c07d65ade.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.0/ee2b789c-951d-426e-87e3-232c07d65ade.json
deleted file mode 100644
index 74c906439e869957317ff481df18a7550bc9e6b8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.0/ee2b789c-951d-426e-87e3-232c07d65ade.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.0/1762652580.283937",
- "retrieved_timestamp": "1762652580.283938",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7695159953368174
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.541762771903226
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47129909365558914
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4551145833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4253656914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.1/2316b408-c94b-471e-b64b-c1f8f345868e.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.1/2316b408-c94b-471e-b64b-c1f8f345868e.json
deleted file mode 100644
index 50ee1a5bb8c717afa67e85bc286361a640144e27..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.1/2316b408-c94b-471e-b64b-c1f8f345868e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.1/1762652580.2841558",
- "retrieved_timestamp": "1762652580.284157",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6626296005709296
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48640249867140106
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13293051359516617
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28691275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38429166666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3849734042553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.2/49d47f6d-0d11-4b07-b42e-b94310c97d3e.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.2/49d47f6d-0d11-4b07-b42e-b94310c97d3e.json
deleted file mode 100644
index 2cc7daf840bac4200f672b27696f9f68add28502..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.2/49d47f6d-0d11-4b07-b42e-b94310c97d3e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.2/1762652580.284375",
- "retrieved_timestamp": "1762652580.284375",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49646715160219335
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.494592979290867
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12084592145015106
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41724999999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3968583776595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.3/0ec990b0-b908-44f5-9fb7-5ee603737bc7.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.3/0ec990b0-b908-44f5-9fb7-5ee603737bc7.json
deleted file mode 100644
index 76032093d657af947693785aa13273b00b8e962d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.3/0ec990b0-b908-44f5-9fb7-5ee603737bc7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.3/1762652580.284589",
- "retrieved_timestamp": "1762652580.284589",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49951462120506923
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5026055485090198
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12311178247734139
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41873958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4015957446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.4/34c33a97-ae07-42e9-8025-9076e2bce3bb.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.4/34c33a97-ae07-42e9-8025-9076e2bce3bb.json
deleted file mode 100644
index 05a4f1ee29c43947873a3c1b49db530b83bbcf88..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.4/34c33a97-ae07-42e9-8025-9076e2bce3bb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.4/1762652580.284807",
- "retrieved_timestamp": "1762652580.284807",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6078748830879843
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5467076263362468
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2809667673716012
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3238255033557047
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47138541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44190492021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.5/bd4ff159-0bf9-4fe1-8cc8-9f3d7bb47bbc.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.5/bd4ff159-0bf9-4fe1-8cc8-9f3d7bb47bbc.json
deleted file mode 100644
index 1d0e7896eddf638c8296c6c5058d0be0453d8649..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.5/bd4ff159-0bf9-4fe1-8cc8-9f3d7bb47bbc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.5/1762652580.2850199",
- "retrieved_timestamp": "1762652580.2850208",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5650352176669016
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5522599149696679
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2756797583081571
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3271812080536913
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49820833333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44481382978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.7/4aa966fc-ee99-430c-8688-99565f5e6fcc.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.7/4aa966fc-ee99-430c-8688-99565f5e6fcc.json
deleted file mode 100644
index c8a4157eee60f6da337e0283fae31bb04db3bb46..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.7/4aa966fc-ee99-430c-8688-99565f5e6fcc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.7/1762652580.285239",
- "retrieved_timestamp": "1762652580.285239",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.7"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4201551882338861
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5391718355132782
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29154078549848944
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3238255033557047
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48484375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42802526595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.8/e908901d-c122-4458-9d4e-9a7d1242211c.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.8/e908901d-c122-4458-9d4e-9a7d1242211c.json
deleted file mode 100644
index 0b8cd3b99eb8df166f9d25d516855e4d781535b0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.8/e908901d-c122-4458-9d4e-9a7d1242211c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-nerd-uncensored-v1.8/1762652580.2854452",
- "retrieved_timestamp": "1762652580.285446",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "jeffmeloy/Qwen2.5-7B-nerd-uncensored-v1.8"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6255601803215468
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5446899383425835
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.270392749244713
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3238255033557047
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47671875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4343417553191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.0/e9350de5-cae6-46bc-a83f-0e6e65eae4e3.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.0/e9350de5-cae6-46bc-a83f-0e6e65eae4e3.json
deleted file mode 100644
index eaf7786d9bf99bdad050033ce6f9015f35428512..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.0/e9350de5-cae6-46bc-a83f-0e6e65eae4e3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.0/1762652580.285652",
- "retrieved_timestamp": "1762652580.2856529",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jeffmeloy/Qwen2.5-7B-olm-v1.0",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "jeffmeloy/Qwen2.5-7B-olm-v1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5331365222055258
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5659918212629057
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2862537764350453
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42776041666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4566156914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.1/769eabf2-4c12-4a48-8ec2-7dacf50a28f0.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.1/769eabf2-4c12-4a48-8ec2-7dacf50a28f0.json
deleted file mode 100644
index af46d71bb41bf4455a026052845a7842aeb2d1a7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.1/769eabf2-4c12-4a48-8ec2-7dacf50a28f0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.1/1762652580.285865",
- "retrieved_timestamp": "1762652580.285865",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jeffmeloy/Qwen2.5-7B-olm-v1.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "jeffmeloy/Qwen2.5-7B-olm-v1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4329445870290828
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5478077656573704
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38293051359516617
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48081250000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4354222074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.2/8c4531a4-4418-4090-9c82-f60bcf8d9935.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.2/8c4531a4-4418-4090-9c82-f60bcf8d9935.json
deleted file mode 100644
index b35d6d1df11e3b8ac82921f54a9ba834892cffa9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.2/8c4531a4-4418-4090-9c82-f60bcf8d9935.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.2/1762652580.286082",
- "retrieved_timestamp": "1762652580.286083",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jeffmeloy/Qwen2.5-7B-olm-v1.2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "jeffmeloy/Qwen2.5-7B-olm-v1.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42025492360270744
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5533340429711561
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2847432024169184
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31711409395973156
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46878125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4387466755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.3/a5c9246f-a7b5-4183-9a64-93151b536945.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.3/a5c9246f-a7b5-4183-9a64-93151b536945.json
deleted file mode 100644
index ebafcd423c6f5e20d0e2773f04a719fa15d910f0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.3/a5c9246f-a7b5-4183-9a64-93151b536945.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.3/1762652580.286303",
- "retrieved_timestamp": "1762652580.286304",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jeffmeloy/Qwen2.5-7B-olm-v1.3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "jeffmeloy/Qwen2.5-7B-olm-v1.3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4218540140161438
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5531852688351706
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104229607250755
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3213087248322148
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4700520833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44697473404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.4/1faf58ba-28e7-45a1-bc2c-d0aa707a49aa.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.4/1faf58ba-28e7-45a1-bc2c-d0aa707a49aa.json
deleted file mode 100644
index 8554a40d07a04e14cd4c9a95e5f5a237692f5708..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.4/1faf58ba-28e7-45a1-bc2c-d0aa707a49aa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.4/1762652580.286527",
- "retrieved_timestamp": "1762652580.2865438",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jeffmeloy/Qwen2.5-7B-olm-v1.4",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "jeffmeloy/Qwen2.5-7B-olm-v1.4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4545018329144448
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5581962445576828
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29229607250755285
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31208053691275167
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46220833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4457280585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.5/b347eea5-e676-478e-b0ee-d53abf2c8697.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.5/b347eea5-e676-478e-b0ee-d53abf2c8697.json
deleted file mode 100644
index b768d92651f0dcb6aef398fd0d4e34fcbf5a7122..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_Qwen2.5-7B-olm-v1.5/b347eea5-e676-478e-b0ee-d53abf2c8697.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jeffmeloy_Qwen2.5-7B-olm-v1.5/1762652580.286995",
- "retrieved_timestamp": "1762652580.286996",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jeffmeloy/Qwen2.5-7B-olm-v1.5",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "jeffmeloy/Qwen2.5-7B-olm-v1.5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4546514359676769
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5543943528577703
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28172205438066467
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33976510067114096
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4539270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43991023936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_jeffmeloy_Qwen2.5-7B-minperplexity-1/ba005ac7-761f-4cd7-91ed-34b88028240f.json b/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_jeffmeloy_Qwen2.5-7B-minperplexity-1/ba005ac7-761f-4cd7-91ed-34b88028240f.json
deleted file mode 100644
index cba0021742962828b4a45f76ad09e41cca047ee3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/jeffmeloy_jeffmeloy_Qwen2.5-7B-minperplexity-1/ba005ac7-761f-4cd7-91ed-34b88028240f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jeffmeloy_jeffmeloy_Qwen2.5-7B-minperplexity-1/1762652580.2872581",
- "retrieved_timestamp": "1762652580.2872589",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "jeffmeloy/jeffmeloy_Qwen2.5-7B-minperplexity-1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37571643239936703
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5582354546195324
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29154078549848944
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33221476510067116
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42903125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4367519946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/kayfour_T3Q-Qwen2.5-7B-it-KOR-Safe/35e56ec7-deae-4674-abfc-3c45f5dec040.json b/leaderboard_data/HFOpenLLMv2/alibaba/kayfour_T3Q-Qwen2.5-7B-it-KOR-Safe/35e56ec7-deae-4674-abfc-3c45f5dec040.json
deleted file mode 100644
index 3c6db091a426c466a0232c5e90a146ac98636798..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/kayfour_T3Q-Qwen2.5-7B-it-KOR-Safe/35e56ec7-deae-4674-abfc-3c45f5dec040.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/kayfour_T3Q-Qwen2.5-7B-it-KOR-Safe/1762652580.3057542",
- "retrieved_timestamp": "1762652580.305755",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "kayfour/T3Q-Qwen2.5-7B-it-KOR-Safe"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6081497094376255
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5549941776226351
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37613293051359514
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3213087248322148
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42772916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44639295212765956
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/kms7530_chemeng_qwen-math-7b_24_1_100_1/af7f201f-3af3-4ffb-9416-c83235851cb6.json b/leaderboard_data/HFOpenLLMv2/alibaba/kms7530_chemeng_qwen-math-7b_24_1_100_1/af7f201f-3af3-4ffb-9416-c83235851cb6.json
deleted file mode 100644
index 02c9067aece085852d29101a105ea9010eb80ed8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/kms7530_chemeng_qwen-math-7b_24_1_100_1/af7f201f-3af3-4ffb-9416-c83235851cb6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/kms7530_chemeng_qwen-math-7b_24_1_100_1/1762652580.310198",
- "retrieved_timestamp": "1762652580.310199",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "kms7530/chemeng_qwen-math-7b_24_1_100_1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "kms7530/chemeng_qwen-math-7b_24_1_100_1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.211052230304481
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3578007894497858
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2243202416918429
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24412751677852348
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3686979166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21584109042553193
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "?",
- "params_billions": 8.911
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/kms7530_chemeng_qwen-math-7b_24_1_100_1_nonmath/8ae7c857-be7e-463e-86c2-6b165920a45c.json b/leaderboard_data/HFOpenLLMv2/alibaba/kms7530_chemeng_qwen-math-7b_24_1_100_1_nonmath/8ae7c857-be7e-463e-86c2-6b165920a45c.json
deleted file mode 100644
index 465479cffaca0afb4969d635252d705889f485ce..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/kms7530_chemeng_qwen-math-7b_24_1_100_1_nonmath/8ae7c857-be7e-463e-86c2-6b165920a45c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/kms7530_chemeng_qwen-math-7b_24_1_100_1_nonmath/1762652580.310462",
- "retrieved_timestamp": "1762652580.310463",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "kms7530/chemeng_qwen-math-7b_24_1_100_1_nonmath"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25836336476105626
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3892856967853256
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30966767371601206
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40869791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24517952127659576
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "?",
- "params_billions": 15.231
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_212_QwenLawLo/c4f888d2-c08c-43c4-a1f9-79edf519c893.json b/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_212_QwenLawLo/c4f888d2-c08c-43c4-a1f9-79edf519c893.json
deleted file mode 100644
index 6212f6dc0218e0b6e0a51fb8196281f745e713e8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_212_QwenLawLo/c4f888d2-c08c-43c4-a1f9-79edf519c893.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_212_QwenLawLo/1762652580.322983",
- "retrieved_timestamp": "1762652580.322984",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lkoenig/BBAI_212_QwenLawLo",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "lkoenig/BBAI_212_QwenLawLo"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4566250880995758
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5574113357405873
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3602719033232628
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43696874999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44888630319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_212_Qwencore/d42a520c-15dd-4497-a26a-b6f77b3257e6.json b/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_212_Qwencore/d42a520c-15dd-4497-a26a-b6f77b3257e6.json
deleted file mode 100644
index f5d94f72156803249b68f1926a1d22525d07e54c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_212_Qwencore/d42a520c-15dd-4497-a26a-b6f77b3257e6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_212_Qwencore/1762652580.3232372",
- "retrieved_timestamp": "1762652580.323238",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lkoenig/BBAI_212_Qwencore",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "lkoenig/BBAI_212_Qwencore"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4384400058511416
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.556868234536878
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34894259818731116
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4343333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.448969414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_230_Xiaqwen/c9393ea7-3269-435f-9159-95638b9c691e.json b/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_230_Xiaqwen/c9393ea7-3269-435f-9159-95638b9c691e.json
deleted file mode 100644
index de42fa574556c460b5a17f6b6548e19efed6efab..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_230_Xiaqwen/c9393ea7-3269-435f-9159-95638b9c691e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_230_Xiaqwen/1762652580.3234491",
- "retrieved_timestamp": "1762652580.32345",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lkoenig/BBAI_230_Xiaqwen",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "lkoenig/BBAI_230_Xiaqwen"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4648931501748693
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.557779565750489
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36631419939577037
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4422083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4480551861702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_375_QwenDyancabs/08e49740-3cdd-47b2-9b95-b96d8a13dd79.json b/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_375_QwenDyancabs/08e49740-3cdd-47b2-9b95-b96d8a13dd79.json
deleted file mode 100644
index 68a2b376196d0bfb964743fa3656ce58e38f1cb9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_375_QwenDyancabs/08e49740-3cdd-47b2-9b95-b96d8a13dd79.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_375_QwenDyancabs/1762652580.323661",
- "retrieved_timestamp": "1762652580.323662",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lkoenig/BBAI_375_QwenDyancabs",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "lkoenig/BBAI_375_QwenDyancabs"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4565752204151651
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5571383122938682
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.377643504531722
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44617708333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4476396276595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_456_QwenKoen/249b0b65-5c71-4c5d-9802-28df0ead0cdf.json b/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_456_QwenKoen/249b0b65-5c71-4c5d-9802-28df0ead0cdf.json
deleted file mode 100644
index 2f2d1e8b7c820a7fd8983e1f664a15d18b1073b1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_456_QwenKoen/249b0b65-5c71-4c5d-9802-28df0ead0cdf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_456_QwenKoen/1762652580.323869",
- "retrieved_timestamp": "1762652580.323869",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lkoenig/BBAI_456_QwenKoen",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "lkoenig/BBAI_456_QwenKoen"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45292823042859615
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5552713612233481
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3685800604229607
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4395104166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4468916223404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_KoenQwenDyan/fe084d09-ee80-4c7f-93a7-3ee0f9081177.json b/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_KoenQwenDyan/fe084d09-ee80-4c7f-93a7-3ee0f9081177.json
deleted file mode 100644
index 1b8d76394474ea8bbd5d8c6de1d55c9d4f47e061..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_KoenQwenDyan/fe084d09-ee80-4c7f-93a7-3ee0f9081177.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_7B_KoenQwenDyan/1762652580.324076",
- "retrieved_timestamp": "1762652580.3240771",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lkoenig/BBAI_7B_KoenQwenDyan",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "lkoenig/BBAI_7B_KoenQwenDyan"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5807224830117421
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5536566841353078
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37386706948640486
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3179530201342282
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43687499999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44597739361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_Qwen2.5koen/078cedea-7b3a-4c77-b932-3d42f0c841fe.json b/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_Qwen2.5koen/078cedea-7b3a-4c77-b932-3d42f0c841fe.json
deleted file mode 100644
index cb083261a0289756a30307350f5fcedef18874b5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_Qwen2.5koen/078cedea-7b3a-4c77-b932-3d42f0c841fe.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_7B_Qwen2.5koen/1762652580.324276",
- "retrieved_timestamp": "1762652580.324277",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lkoenig/BBAI_7B_Qwen2.5koen",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "lkoenig/BBAI_7B_Qwen2.5koen"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45999725173650363
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5544031312134464
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36555891238670696
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43690625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4484707446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_QwenDyanKoenLo/dedc34ed-fd8f-4b29-b898-3c9830993247.json b/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_QwenDyanKoenLo/dedc34ed-fd8f-4b29-b898-3c9830993247.json
deleted file mode 100644
index 1cb7e6f8344f31a49a8ce206ecc63c56a299f343..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_QwenDyanKoenLo/dedc34ed-fd8f-4b29-b898-3c9830993247.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_7B_QwenDyanKoenLo/1762652580.324512",
- "retrieved_timestamp": "1762652580.324513",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lkoenig/BBAI_7B_QwenDyanKoenLo",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "lkoenig/BBAI_7B_QwenDyanKoenLo"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46631714960748594
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5562461525503201
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3640483383685801
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3187919463087248
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4343020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4464760638297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_QwenDyancabsLAW/05f391f3-68ac-422a-b7e8-01eba1729a0b.json b/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_QwenDyancabsLAW/05f391f3-68ac-422a-b7e8-01eba1729a0b.json
deleted file mode 100644
index f21cabfb35e1d6a79c5e1cdc43f71214a2b7837f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/lkoenig_BBAI_7B_QwenDyancabsLAW/05f391f3-68ac-422a-b7e8-01eba1729a0b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_7B_QwenDyancabsLAW/1762652580.3247318",
- "retrieved_timestamp": "1762652580.3247318",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lkoenig/BBAI_7B_QwenDyancabsLAW",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "lkoenig/BBAI_7B_QwenDyancabsLAW"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5549685944405289
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5578836606885887
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3678247734138973
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3187919463087248
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4461145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4471409574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/macadeliccc_Samantha-Qwen-2-7B/c443492e-3b5f-4394-9fbb-761dba338638.json b/leaderboard_data/HFOpenLLMv2/alibaba/macadeliccc_Samantha-Qwen-2-7B/c443492e-3b5f-4394-9fbb-761dba338638.json
deleted file mode 100644
index 4cc97048dbf7f222b96d98be74fc332b3e572500..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/macadeliccc_Samantha-Qwen-2-7B/c443492e-3b5f-4394-9fbb-761dba338638.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/macadeliccc_Samantha-Qwen-2-7B/1762652580.3290062",
- "retrieved_timestamp": "1762652580.3290062",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "macadeliccc/Samantha-Qwen-2-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "macadeliccc/Samantha-Qwen-2-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4377152621710395
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5082341412476951
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21148036253776434
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4799479166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3779089095744681
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-MST-v1.1/19b72caf-a841-4928-98c3-c505694724c3.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-MST-v1.1/19b72caf-a841-4928-98c3-c505694724c3.json
deleted file mode 100644
index a2f51c6d66b0f425dbb584085b191955259a16d4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-MST-v1.1/19b72caf-a841-4928-98c3-c505694724c3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-MST-v1.1/1762652580.333172",
- "retrieved_timestamp": "1762652580.333172",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7444868504457063
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.555919540267728
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4652567975830816
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4073333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.429936835106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-MST-v1.3/36b2821f-5fa6-4384-9ddc-6cbc5b52321c.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-MST-v1.3/36b2821f-5fa6-4384-9ddc-6cbc5b52321c.json
deleted file mode 100644
index 187ceb497b30b22b3182bd139360bf5d3a54c5ff..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-MST-v1.3/36b2821f-5fa6-4384-9ddc-6cbc5b52321c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-MST-v1.3/1762652580.333376",
- "retrieved_timestamp": "1762652580.3333771",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "marcuscedricridia/Hush-Qwen2.5-7B-MST-v1.3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.704320092909037
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5516165586639877
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47583081570996977
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3145973154362416
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43105208333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44398271276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-MST/80d3a785-dde1-44fa-b6e1-93722849fdb1.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-MST/80d3a785-dde1-44fa-b6e1-93722849fdb1.json
deleted file mode 100644
index 5214db02a37f881360138ef5a116acb0f1d5e0ee..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-MST/80d3a785-dde1-44fa-b6e1-93722849fdb1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-MST/1762652580.332918",
- "retrieved_timestamp": "1762652580.3329191",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "marcuscedricridia/Hush-Qwen2.5-7B-MST",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "marcuscedricridia/Hush-Qwen2.5-7B-MST"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7488330961847898
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5458495423775734
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4244712990936556
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3913645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41630651595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-Preview/6bfc8cf9-e615-4447-bc6e-ff96752dc5fb.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-Preview/6bfc8cf9-e615-4447-bc6e-ff96752dc5fb.json
deleted file mode 100644
index 81c9ade0e28c1ed0bb70d02f9e0a281d89e03794..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-Preview/6bfc8cf9-e615-4447-bc6e-ff96752dc5fb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-Preview/1762652580.333591",
- "retrieved_timestamp": "1762652580.3335922",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "marcuscedricridia/Hush-Qwen2.5-7B-Preview",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "marcuscedricridia/Hush-Qwen2.5-7B-Preview"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7962439660101863
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5431064770878757
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37537764350453173
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.311241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4298125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43641954787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-RP-v1.4-1M/feefc068-9257-4d0f-ac55-acd08ededeca.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-RP-v1.4-1M/feefc068-9257-4d0f-ac55-acd08ededeca.json
deleted file mode 100644
index b1be1b278b0bbdcc65a58622cbd334aa6a1c05b2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-RP-v1.4-1M/feefc068-9257-4d0f-ac55-acd08ededeca.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-RP-v1.4-1M/1762652580.333802",
- "retrieved_timestamp": "1762652580.333802",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "marcuscedricridia/Hush-Qwen2.5-7B-RP-v1.4-1M"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7727884236049238
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5295123017150106
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3368580060422961
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44327083333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4134807180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.1/25d6c4bd-6540-43cb-a682-77d4fa4eb64e.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.1/25d6c4bd-6540-43cb-a682-77d4fa4eb64e.json
deleted file mode 100644
index 39aedf392c7cc6c84c554559c1e602235ef565a1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.1/25d6c4bd-6540-43cb-a682-77d4fa4eb64e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-v1.1/1762652580.334015",
- "retrieved_timestamp": "1762652580.334016",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "marcuscedricridia/Hush-Qwen2.5-7B-v1.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7889499860370484
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5383575636307666
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4380664652567976
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4179375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4227061170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.2/6e342711-8d2d-42ed-a019-11be429e10d8.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.2/6e342711-8d2d-42ed-a019-11be429e10d8.json
deleted file mode 100644
index 937c6a72e90d927a3900845bcdbad6a6a97a5925..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.2/6e342711-8d2d-42ed-a019-11be429e10d8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-v1.2/1762652580.334213",
- "retrieved_timestamp": "1762652580.334214",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "marcuscedricridia/Hush-Qwen2.5-7B-v1.2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7865020368178655
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.540250407222091
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44033232628398794
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3145973154362416
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.421875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4197140957446808
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.3/1af605c0-ec58-4651-a57a-2fd7d0cd5a67.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.3/1af605c0-ec58-4651-a57a-2fd7d0cd5a67.json
deleted file mode 100644
index 3a2036c672ad1fdfc3fa51140624850c72aa01e4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.3/1af605c0-ec58-4651-a57a-2fd7d0cd5a67.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-v1.3/1762652580.334473",
- "retrieved_timestamp": "1762652580.334474",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "marcuscedricridia/Hush-Qwen2.5-7B-v1.3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7856276900845313
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5326893189699237
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3323262839879154
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31208053691275167
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42463541666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43450797872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.4/fd65e319-bc38-457b-9913-9a2214e69823.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.4/fd65e319-bc38-457b-9913-9a2214e69823.json
deleted file mode 100644
index e687f5eab45d255345b8253bcacb37d2386b505c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Hush-Qwen2.5-7B-v1.4/fd65e319-bc38-457b-9913-9a2214e69823.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/marcuscedricridia_Hush-Qwen2.5-7B-v1.4/1762652580.334734",
- "retrieved_timestamp": "1762652580.3347352",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "marcuscedricridia/Hush-Qwen2.5-7B-v1.4",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "marcuscedricridia/Hush-Qwen2.5-7B-v1.4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7834545672149895
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.54229983590397
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4259818731117825
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.311241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4231770833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4195478723404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Qwen2.5-7B-Preview/56032f8a-b733-4b1f-acbc-78d0d1ddf2a5.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Qwen2.5-7B-Preview/56032f8a-b733-4b1f-acbc-78d0d1ddf2a5.json
deleted file mode 100644
index 29d967667462660341a43ba40dbf71d5c0b89aa2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Qwen2.5-7B-Preview/56032f8a-b733-4b1f-acbc-78d0d1ddf2a5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/marcuscedricridia_Qwen2.5-7B-Preview/1762652580.334959",
- "retrieved_timestamp": "1762652580.334959",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "marcuscedricridia/Qwen2.5-7B-Preview",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "marcuscedricridia/Qwen2.5-7B-Preview"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7679423928509688
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5359781834039953
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34441087613293053
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3238255033557047
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41403125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42578125
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Yell-Qwen2.5-7B-Preview-v1.1/be0058b1-23b2-40b7-b336-ab40bf82c997.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Yell-Qwen2.5-7B-Preview-v1.1/be0058b1-23b2-40b7-b336-ab40bf82c997.json
deleted file mode 100644
index 4d7920fac0775b5c1f2f3fffdcdb350d380b3c46..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Yell-Qwen2.5-7B-Preview-v1.1/be0058b1-23b2-40b7-b336-ab40bf82c997.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/marcuscedricridia_Yell-Qwen2.5-7B-Preview-v1.1/1762652580.335416",
- "retrieved_timestamp": "1762652580.335417",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "marcuscedricridia/Yell-Qwen2.5-7B-Preview-v1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5757013612769672
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5347734083768815
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18957703927492447
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2860738255033557
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4059375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38314494680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Yell-Qwen2.5-7B-Preview/f47334f2-f0ab-48f5-814e-f3ede36802d9.json b/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Yell-Qwen2.5-7B-Preview/f47334f2-f0ab-48f5-814e-f3ede36802d9.json
deleted file mode 100644
index de5d1157f3a148da79e37f3e45b6f7bb5aed36a2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/marcuscedricridia_Yell-Qwen2.5-7B-Preview/f47334f2-f0ab-48f5-814e-f3ede36802d9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/marcuscedricridia_Yell-Qwen2.5-7B-Preview/1762652580.335188",
- "retrieved_timestamp": "1762652580.335188",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "marcuscedricridia/Yell-Qwen2.5-7B-Preview",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "marcuscedricridia/Yell-Qwen2.5-7B-Preview"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5838696879834395
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.537136379549371
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19259818731117825
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40463541666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37982047872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/maywell_Qwen2-7B-Multilingual-RP/fd91f8aa-a521-4e9b-824a-aa21adade569.json b/leaderboard_data/HFOpenLLMv2/alibaba/maywell_Qwen2-7B-Multilingual-RP/fd91f8aa-a521-4e9b-824a-aa21adade569.json
deleted file mode 100644
index 9bbede6a50bbe28ddbf4efbfdeb5965877639638..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/maywell_Qwen2-7B-Multilingual-RP/fd91f8aa-a521-4e9b-824a-aa21adade569.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/maywell_Qwen2-7B-Multilingual-RP/1762652580.342533",
- "retrieved_timestamp": "1762652580.3425338",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "maywell/Qwen2-7B-Multilingual-RP",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "maywell/Qwen2-7B-Multilingual-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4347176602525743
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5062058680861069
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2243202416918429
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3695625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3858876329787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/mergekit-community_SuperQwen-2.5-1.5B/95d33475-a71b-41d6-a08d-3da30e631897.json b/leaderboard_data/HFOpenLLMv2/alibaba/mergekit-community_SuperQwen-2.5-1.5B/95d33475-a71b-41d6-a08d-3da30e631897.json
deleted file mode 100644
index 1e8c944c05d6ccb2b0b12292dad4778e86dafd68..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/mergekit-community_SuperQwen-2.5-1.5B/95d33475-a71b-41d6-a08d-3da30e631897.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/mergekit-community_SuperQwen-2.5-1.5B/1762652580.346312",
- "retrieved_timestamp": "1762652580.346313",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "mergekit-community/SuperQwen-2.5-1.5B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "mergekit-community/SuperQwen-2.5-1.5B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1336409615376091
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2906897601443365
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.019637462235649546
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25419463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3355208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10746343085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/mhl1_Qwen2.5-0.5B-cinstruct-stage1/bf9d8219-66b9-4c77-8c6d-2983e60dc2cb.json b/leaderboard_data/HFOpenLLMv2/alibaba/mhl1_Qwen2.5-0.5B-cinstruct-stage1/bf9d8219-66b9-4c77-8c6d-2983e60dc2cb.json
deleted file mode 100644
index e854fd70171a11618d42c6c28532ee9b7cd452e6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/mhl1_Qwen2.5-0.5B-cinstruct-stage1/bf9d8219-66b9-4c77-8c6d-2983e60dc2cb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/mhl1_Qwen2.5-0.5B-cinstruct-stage1/1762652580.3535528",
- "retrieved_timestamp": "1762652580.353554",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "mhl1/Qwen2.5-0.5B-cinstruct-stage1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "mhl1/Qwen2.5-0.5B-cinstruct-stage1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14817905379947427
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32557832478283544
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01283987915407855
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35003125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11394614361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/migtissera_Tess-v2.5.2-Qwen2-72B/34b9dd9e-dc03-4354-b016-3b1463a902f9.json b/leaderboard_data/HFOpenLLMv2/alibaba/migtissera_Tess-v2.5.2-Qwen2-72B/34b9dd9e-dc03-4354-b016-3b1463a902f9.json
deleted file mode 100644
index a5fdfff157fee40c9ae6f9fe9594ba9a5e615d2d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/migtissera_Tess-v2.5.2-Qwen2-72B/34b9dd9e-dc03-4354-b016-3b1463a902f9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/migtissera_Tess-v2.5.2-Qwen2-72B/1762652580.359263",
- "retrieved_timestamp": "1762652580.359264",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "migtissera/Tess-v2.5.2-Qwen2-72B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "migtissera/Tess-v2.5.2-Qwen2-72B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44943084349525925
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6646791891060648
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2938066465256798
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35067114093959734
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41883333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5561003989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/minghaowu_Qwen1.5-1.8B-OpenHermes-2.5/cf3f376a-92ec-4678-a57a-cee2e40032a5.json b/leaderboard_data/HFOpenLLMv2/alibaba/minghaowu_Qwen1.5-1.8B-OpenHermes-2.5/cf3f376a-92ec-4678-a57a-cee2e40032a5.json
deleted file mode 100644
index ec0299a7859b18fc8f9a3100d884a2e1bc190deb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/minghaowu_Qwen1.5-1.8B-OpenHermes-2.5/cf3f376a-92ec-4678-a57a-cee2e40032a5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/minghaowu_Qwen1.5-1.8B-OpenHermes-2.5/1762652580.360414",
- "retrieved_timestamp": "1762652580.360415",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "minghaowu/Qwen1.5-1.8B-OpenHermes-2.5",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "minghaowu/Qwen1.5-1.8B-OpenHermes-2.5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27779735546128714
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33746396801266015
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02416918429003021
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2835570469798658
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3528854166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17918882978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.837
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/mobiuslabsgmbh_DeepSeek-R1-ReDistill-Qwen-7B-v1.1/99d27765-a9c5-4f50-8bd1-c3ce67683621.json b/leaderboard_data/HFOpenLLMv2/alibaba/mobiuslabsgmbh_DeepSeek-R1-ReDistill-Qwen-7B-v1.1/99d27765-a9c5-4f50-8bd1-c3ce67683621.json
deleted file mode 100644
index ebd0d0ea1370c1e506f2326d8a2ce2bb66d71d7b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/mobiuslabsgmbh_DeepSeek-R1-ReDistill-Qwen-7B-v1.1/99d27765-a9c5-4f50-8bd1-c3ce67683621.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/mobiuslabsgmbh_DeepSeek-R1-ReDistill-Qwen-7B-v1.1/1762652580.371459",
- "retrieved_timestamp": "1762652580.3714602",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "mobiuslabsgmbh/DeepSeek-R1-ReDistill-Qwen-7B-v1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34731512387132807
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36983762765044165
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3496978851963746
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40088541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23262965425531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-1.5B/f2eaeee8-a75b-4d0f-9dcd-2a11c3de926b.json b/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-1.5B/f2eaeee8-a75b-4d0f-9dcd-2a11c3de926b.json
deleted file mode 100644
index e03df7d08c3937224e5487266eb9ddf266dd6228..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-1.5B/f2eaeee8-a75b-4d0f-9dcd-2a11c3de926b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/nbeerbower_Dumpling-Qwen2.5-1.5B/1762652580.377223",
- "retrieved_timestamp": "1762652580.377223",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "nbeerbower/Dumpling-Qwen2.5-1.5B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "nbeerbower/Dumpling-Qwen2.5-1.5B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3698963195432563
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4159743091354106
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11706948640483383
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37276041666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2771775265957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-14B/0a70cdb4-5ccc-40e2-bf99-3af619b8b7f6.json b/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-14B/0a70cdb4-5ccc-40e2-bf99-3af619b8b7f6.json
deleted file mode 100644
index d6d272fcfb07907a9b8bb7c91a9006c6c4c7fb2f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-14B/0a70cdb4-5ccc-40e2-bf99-3af619b8b7f6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/nbeerbower_Dumpling-Qwen2.5-14B/1762652580.3774788",
- "retrieved_timestamp": "1762652580.37748",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "nbeerbower/Dumpling-Qwen2.5-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "nbeerbower/Dumpling-Qwen2.5-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6064010159709571
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6450644262798378
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30966767371601206
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011744966442953
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43539583333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5170378989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-7B-1k-r16/76e3f2a5-7545-4270-800d-6413e39608ad.json b/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-7B-1k-r16/76e3f2a5-7545-4270-800d-6413e39608ad.json
deleted file mode 100644
index 82481c2e5742262e2213e437b4d28fe995ee1056..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-7B-1k-r16/76e3f2a5-7545-4270-800d-6413e39608ad.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/nbeerbower_Dumpling-Qwen2.5-7B-1k-r16/1762652580.3776908",
- "retrieved_timestamp": "1762652580.377692",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r16",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r16"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4860004787297703
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5214228032573378
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.236404833836858
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4229895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39586103723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-7B-1k-r64-2e-5/2e6c1c46-01af-493a-a2ce-266d13b53000.json b/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-7B-1k-r64-2e-5/2e6c1c46-01af-493a-a2ce-266d13b53000.json
deleted file mode 100644
index 2fc0fe24c055353299ab1db45082feafa83917ff..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Dumpling-Qwen2.5-7B-1k-r64-2e-5/2e6c1c46-01af-493a-a2ce-266d13b53000.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/nbeerbower_Dumpling-Qwen2.5-7B-1k-r64-2e-5/1762652580.377894",
- "retrieved_timestamp": "1762652580.377894",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "nbeerbower/Dumpling-Qwen2.5-7B-1k-r64-2e-5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.417906709752346
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5300548108450988
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21148036253776434
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4486041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41215093085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_EVA-abliterated-TIES-Qwen2.5-1.5B/dea423e8-cdbd-4895-80af-f53dbb5caa1c.json b/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_EVA-abliterated-TIES-Qwen2.5-1.5B/dea423e8-cdbd-4895-80af-f53dbb5caa1c.json
deleted file mode 100644
index 979ea7c06a189234e0878791b385f8952d1e7d80..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_EVA-abliterated-TIES-Qwen2.5-1.5B/dea423e8-cdbd-4895-80af-f53dbb5caa1c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/nbeerbower_EVA-abliterated-TIES-Qwen2.5-1.5B/1762652580.378096",
- "retrieved_timestamp": "1762652580.3780968",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-1.5B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41148707651254224
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39965589836197535
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13746223564954682
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35018750000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27119348404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_EVA-abliterated-TIES-Qwen2.5-14B/997fc8c5-fc91-4e9e-a2b7-bdda77e4f4a7.json b/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_EVA-abliterated-TIES-Qwen2.5-14B/997fc8c5-fc91-4e9e-a2b7-bdda77e4f4a7.json
deleted file mode 100644
index b0473b64607e855f4ac6a37f3f9ce73174ab44d1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_EVA-abliterated-TIES-Qwen2.5-14B/997fc8c5-fc91-4e9e-a2b7-bdda77e4f4a7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/nbeerbower_EVA-abliterated-TIES-Qwen2.5-14B/1762652580.378304",
- "retrieved_timestamp": "1762652580.378304",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "nbeerbower/EVA-abliterated-TIES-Qwen2.5-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.783554302583811
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6372016353633118
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5045317220543807
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3548657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4406666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5211103723404256
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Qwen2.5-Gutenberg-Doppel-14B/649483fb-4b54-4824-82eb-e78e55e53912.json b/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Qwen2.5-Gutenberg-Doppel-14B/649483fb-4b54-4824-82eb-e78e55e53912.json
deleted file mode 100644
index e7fb6674d4bcfa7d92b24abe83ee775a87fdc043..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/nbeerbower_Qwen2.5-Gutenberg-Doppel-14B/649483fb-4b54-4824-82eb-e78e55e53912.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/nbeerbower_Qwen2.5-Gutenberg-Doppel-14B/1762652580.38376",
- "retrieved_timestamp": "1762652580.38376",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "nbeerbower/Qwen2.5-Gutenberg-Doppel-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "nbeerbower/Qwen2.5-Gutenberg-Doppel-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8090832324897937
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6381735755183319
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5415407854984894
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33305369127516776
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4100625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49210438829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b/0d99e863-596f-43b7-932e-a4a27435e63d.json b/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b/0d99e863-596f-43b7-932e-a4a27435e63d.json
deleted file mode 100644
index fda4d8a3fff333b451d2fe7423fa663e49ef1243..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b/0d99e863-596f-43b7-932e-a4a27435e63d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/netcat420_DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b/1762652580.391702",
- "retrieved_timestamp": "1762652580.3917031",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "netcat420/DeepSeek-R1-Distill-Qwen-MFANN-Slerp-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11500596195871399
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28767781029884354
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0015105740181268882
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3723854166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10895944148936171
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-7B-nerd-uncensored-v0.9-MFANN/399b43e8-3c07-4f3d-8b3e-50b8acd96e78.json b/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-7B-nerd-uncensored-v0.9-MFANN/399b43e8-3c07-4f3d-8b3e-50b8acd96e78.json
deleted file mode 100644
index 9b126a9169afd8a9e20d7fae23126e9ca1c017f6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-7B-nerd-uncensored-v0.9-MFANN/399b43e8-3c07-4f3d-8b3e-50b8acd96e78.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-7B-nerd-uncensored-v0.9-MFANN/1762652580.400365",
- "retrieved_timestamp": "1762652580.400365",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "netcat420/Qwen2.5-7B-nerd-uncensored-v0.9-MFANN"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5878413720040603
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5236664966992856
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3376132930513595
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39257291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.390375664893617
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-7b-MFANN-slerp/d621c163-5ca6-4e54-8913-d931e4a2c6b9.json b/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-7b-MFANN-slerp/d621c163-5ca6-4e54-8913-d931e4a2c6b9.json
deleted file mode 100644
index a7485b3da014c082981dd677fd7ed343d62d6f67..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-7b-MFANN-slerp/d621c163-5ca6-4e54-8913-d931e4a2c6b9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-7b-MFANN-slerp/1762652580.4005811",
- "retrieved_timestamp": "1762652580.4005818",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "netcat420/Qwen2.5-7b-MFANN-slerp",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "netcat420/Qwen2.5-7b-MFANN-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6532123654126606
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5088729928004616
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28700906344410876
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40730208333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3416722074468085
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-7b-nerd-uncensored-MFANN-slerp/170aa8c2-6b80-44d3-9d22-c1a5f7fa2ad4.json b/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-7b-nerd-uncensored-MFANN-slerp/170aa8c2-6b80-44d3-9d22-c1a5f7fa2ad4.json
deleted file mode 100644
index 05ceac128f17d595ea8003705fe090d581cae3f2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-7b-nerd-uncensored-MFANN-slerp/170aa8c2-6b80-44d3-9d22-c1a5f7fa2ad4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-7b-nerd-uncensored-MFANN-slerp/1762652580.4007921",
- "retrieved_timestamp": "1762652580.400793",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "netcat420/Qwen2.5-7b-nerd-uncensored-MFANN-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15644711587476784
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2920111436321769
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3791770833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11003989361702128
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained/2f89ceb3-8bc1-48f0-a4cb-3dc1b8acad87.json b/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained/2f89ceb3-8bc1-48f0-a4cb-3dc1b8acad87.json
deleted file mode 100644
index d7c09d8ddb9e9013c6e51aefd6070807a5ae285a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained/2f89ceb3-8bc1-48f0-a4cb-3dc1b8acad87.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained/1762652580.4012349",
- "retrieved_timestamp": "1762652580.401236",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN-Slerp-Unretrained"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6486411610083467
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5065573474607916
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2990936555891239
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41520833333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3431682180851064
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN/bbd39707-6062-461a-8e09-c8b8bc3451f7.json b/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN/bbd39707-6062-461a-8e09-c8b8bc3451f7.json
deleted file mode 100644
index 1bf9713163d77b44582f11f094b04e17fb079455..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN/bbd39707-6062-461a-8e09-c8b8bc3451f7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN/1762652580.4010181",
- "retrieved_timestamp": "1762652580.4010189",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "netcat420/Qwen2.5-Coder-Scholar-7B-Abliterated-MFANN"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5742274941599401
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5071448530886461
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.256797583081571
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29278523489932884
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40584375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3156582446808511
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b/9b2011ae-9d22-42be-a10b-6ce6e8ff1be4.json b/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b/9b2011ae-9d22-42be-a10b-6ce6e8ff1be4.json
deleted file mode 100644
index 041b176694f7321ddb2e3cfa9b9aee4ec1c42fe4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b/9b2011ae-9d22-42be-a10b-6ce6e8ff1be4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b/1762652580.401459",
- "retrieved_timestamp": "1762652580.40146",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "netcat420/Qwen2.5-DeepSeek-R1-MFANN-Slerp-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2675556412540947
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37890218644722085
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01812688821752266
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23238255033557048
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35279166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16771941489361702
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-MFANN-7b/b6578885-9721-4349-ad55-5a80fd054c85.json b/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-MFANN-7b/b6578885-9721-4349-ad55-5a80fd054c85.json
deleted file mode 100644
index 86c2feaea062deaa174faabc64c3f922dd5b36a1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_Qwen2.5-MFANN-7b/b6578885-9721-4349-ad55-5a80fd054c85.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/netcat420_Qwen2.5-MFANN-7b/1762652580.401672",
- "retrieved_timestamp": "1762652580.401673",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "netcat420/Qwen2.5-MFANN-7b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "netcat420/Qwen2.5-MFANN-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6097233119234742
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5054347004252888
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27870090634441086
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2860738255033557
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4020625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32330452127659576
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_qwen2.5-MFANN-7b-SLERP-V1.2/dfacdde9-fd5d-496f-8038-aa0439c0c991.json b/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_qwen2.5-MFANN-7b-SLERP-V1.2/dfacdde9-fd5d-496f-8038-aa0439c0c991.json
deleted file mode 100644
index 65431ed088f11c05e2e98ed4a76766a817f124a0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_qwen2.5-MFANN-7b-SLERP-V1.2/dfacdde9-fd5d-496f-8038-aa0439c0c991.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/netcat420_qwen2.5-MFANN-7b-SLERP-V1.2/1762652580.40188",
- "retrieved_timestamp": "1762652580.40188",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "netcat420/qwen2.5-MFANN-7b-SLERP-V1.2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "netcat420/qwen2.5-MFANN-7b-SLERP-V1.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6606060807546199
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5111030308243185
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28700906344410876
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4259375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34383311170212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_qwen2.5-MFANN-7b-SLERPv1.1/0e66b7a6-bd6f-48f7-95e2-c117e0ea468f.json b/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_qwen2.5-MFANN-7b-SLERPv1.1/0e66b7a6-bd6f-48f7-95e2-c117e0ea468f.json
deleted file mode 100644
index 1fe39bbdaff7bb1ffb154f47cef34f62267cf924..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_qwen2.5-MFANN-7b-SLERPv1.1/0e66b7a6-bd6f-48f7-95e2-c117e0ea468f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/netcat420_qwen2.5-MFANN-7b-SLERPv1.1/1762652580.402082",
- "retrieved_timestamp": "1762652580.4020832",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "netcat420/qwen2.5-MFANN-7b-SLERPv1.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "netcat420/qwen2.5-MFANN-7b-SLERPv1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6554852236510238
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5074761993537673
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29682779456193353
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41263541666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34483045212765956
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_qwen2.5-MFANN-7b-v1.1/845f96b7-62dc-4ebc-aa62-fcc6263e437f.json b/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_qwen2.5-MFANN-7b-v1.1/845f96b7-62dc-4ebc-aa62-fcc6263e437f.json
deleted file mode 100644
index b67c552126ee2c45e4d0666ee3fbb7e0fd40277c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/netcat420_qwen2.5-MFANN-7b-v1.1/845f96b7-62dc-4ebc-aa62-fcc6263e437f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/netcat420_qwen2.5-MFANN-7b-v1.1/1762652580.402283",
- "retrieved_timestamp": "1762652580.4022841",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "netcat420/qwen2.5-MFANN-7b-v1.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "netcat420/qwen2.5-MFANN-7b-v1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6088489651901399
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49666375554657477
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2824773413897281
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41139583333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3248005319148936
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v0.3-Qwen2.5-7B/0bc5145c-90d0-4a8b-89c6-0b03aa9d0ee1.json b/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v0.3-Qwen2.5-7B/0bc5145c-90d0-4a8b-89c6-0b03aa9d0ee1.json
deleted file mode 100644
index d727387b977885ca419683e5316700b2e02dc922..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v0.3-Qwen2.5-7B/0bc5145c-90d0-4a8b-89c6-0b03aa9d0ee1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/newsbang_Homer-v0.3-Qwen2.5-7B/1762652580.4035761",
- "retrieved_timestamp": "1762652580.403577",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "newsbang/Homer-v0.3-Qwen2.5-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "newsbang/Homer-v0.3-Qwen2.5-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5154013572875525
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5480594290467807
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30891238670694865
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3338926174496644
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47436458333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.445561835106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v0.4-Qwen2.5-7B/9a022bdc-d1b8-4f2e-a1af-6cd3bad6bded.json b/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v0.4-Qwen2.5-7B/9a022bdc-d1b8-4f2e-a1af-6cd3bad6bded.json
deleted file mode 100644
index a61aa977ee71b564bcf0d8932cc9864969b62808..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v0.4-Qwen2.5-7B/9a022bdc-d1b8-4f2e-a1af-6cd3bad6bded.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/newsbang_Homer-v0.4-Qwen2.5-7B/1762652580.403887",
- "retrieved_timestamp": "1762652580.4038882",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "newsbang/Homer-v0.4-Qwen2.5-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "newsbang/Homer-v0.4-Qwen2.5-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.799940823681166
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5533099174800821
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27794561933534745
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31543624161073824
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4310833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4362533244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v0.5-Qwen2.5-7B/dc22ad83-0752-4f5e-97ac-733ef6c6cf53.json b/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v0.5-Qwen2.5-7B/dc22ad83-0752-4f5e-97ac-733ef6c6cf53.json
deleted file mode 100644
index 574b390dca820bf400006148e8800383450013bf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v0.5-Qwen2.5-7B/dc22ad83-0752-4f5e-97ac-733ef6c6cf53.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/newsbang_Homer-v0.5-Qwen2.5-7B/1762652580.404095",
- "retrieved_timestamp": "1762652580.404096",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "newsbang/Homer-v0.5-Qwen2.5-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "newsbang/Homer-v0.5-Qwen2.5-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7880756393037142
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5540181073562815
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3723564954682779
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41930208333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4369182180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v1.0-Qwen2.5-72B/3ebdda73-1c41-4a98-b3cf-ac5d482c8b5c.json b/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v1.0-Qwen2.5-72B/3ebdda73-1c41-4a98-b3cf-ac5d482c8b5c.json
deleted file mode 100644
index b5e681cdd93e808cda6d95040eb3f538e0cc4da5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v1.0-Qwen2.5-72B/3ebdda73-1c41-4a98-b3cf-ac5d482c8b5c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/newsbang_Homer-v1.0-Qwen2.5-72B/1762652580.404309",
- "retrieved_timestamp": "1762652580.40431",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "newsbang/Homer-v1.0-Qwen2.5-72B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "newsbang/Homer-v1.0-Qwen2.5-72B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7627716680629618
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7309799550978827
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4901812688821752
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4161073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4677291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6145279255319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.706
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v1.0-Qwen2.5-7B/1fe21571-0375-43c3-8071-1aaaf0223baa.json b/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v1.0-Qwen2.5-7B/1fe21571-0375-43c3-8071-1aaaf0223baa.json
deleted file mode 100644
index 32a45823d91a39aa47170be3bcfe386e53757b9f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/newsbang_Homer-v1.0-Qwen2.5-7B/1fe21571-0375-43c3-8071-1aaaf0223baa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/newsbang_Homer-v1.0-Qwen2.5-7B/1762652580.404567",
- "retrieved_timestamp": "1762652580.404568",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "newsbang/Homer-v1.0-Qwen2.5-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "newsbang/Homer-v1.0-Qwen2.5-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6392737935344885
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5655254177370223
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3323262839879154
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3221476510067114
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42782291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45345744680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/nguyentd_FinancialAdvice-Qwen2.5-7B/0ced7574-bfc4-4958-a6f5-0944f9ac411a.json b/leaderboard_data/HFOpenLLMv2/alibaba/nguyentd_FinancialAdvice-Qwen2.5-7B/0ced7574-bfc4-4958-a6f5-0944f9ac411a.json
deleted file mode 100644
index ed71c4d43ac21157c98692f0d7a9583f7ca401a3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/nguyentd_FinancialAdvice-Qwen2.5-7B/0ced7574-bfc4-4958-a6f5-0944f9ac411a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/nguyentd_FinancialAdvice-Qwen2.5-7B/1762652580.404779",
- "retrieved_timestamp": "1762652580.4047801",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "nguyentd/FinancialAdvice-Qwen2.5-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "nguyentd/FinancialAdvice-Qwen2.5-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.449605934476079
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4730934153895792
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1148036253776435
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40248958333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.375249335106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/nisten_franqwenstein-35b/3e3344d2-6911-4d5f-85d6-6593cbed3b49.json b/leaderboard_data/HFOpenLLMv2/alibaba/nisten_franqwenstein-35b/3e3344d2-6911-4d5f-85d6-6593cbed3b49.json
deleted file mode 100644
index a9a9c6c96692eb991d9ae98cd56f6ccc98d599c6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/nisten_franqwenstein-35b/3e3344d2-6911-4d5f-85d6-6593cbed3b49.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/nisten_franqwenstein-35b/1762652580.407119",
- "retrieved_timestamp": "1762652580.40712",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "nisten/franqwenstein-35b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "nisten/franqwenstein-35b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39135383005979685
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6591132598701116
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.304380664652568
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35906040268456374
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4681041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5610871010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 34.714
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/nisten_franqwenstein-35b/ff90ed4a-6dcf-4b9b-9d3a-19f933e2c0c8.json b/leaderboard_data/HFOpenLLMv2/alibaba/nisten_franqwenstein-35b/ff90ed4a-6dcf-4b9b-9d3a-19f933e2c0c8.json
deleted file mode 100644
index 7fe6c03cc07dd2d2731e8639745a5d82f63df8f0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/nisten_franqwenstein-35b/ff90ed4a-6dcf-4b9b-9d3a-19f933e2c0c8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/nisten_franqwenstein-35b/1762652580.406877",
- "retrieved_timestamp": "1762652580.406878",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "nisten/franqwenstein-35b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "nisten/franqwenstein-35b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37986320740080765
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6646579178049268
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3406344410876133
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4035234899328859
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49402083333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5730551861702128
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 34.714
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/nisten_tqwendo-36b/3a5b1794-12f1-4004-bdb2-309cc950c757.json b/leaderboard_data/HFOpenLLMv2/alibaba/nisten_tqwendo-36b/3a5b1794-12f1-4004-bdb2-309cc950c757.json
deleted file mode 100644
index 96a164ceef965dbce4791a19e016301b8c10d372..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/nisten_tqwendo-36b/3a5b1794-12f1-4004-bdb2-309cc950c757.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/nisten_tqwendo-36b/1762652580.40731",
- "retrieved_timestamp": "1762652580.4073112",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "nisten/tqwendo-36b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "nisten/tqwendo-36b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6777672132164878
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6431830832659088
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41540785498489424
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44295833333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4380817819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 35.69
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Galactic-Qwen-14B-Exp1/26aea3e6-571c-4751-8b0f-40a86a144973.json b/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Galactic-Qwen-14B-Exp1/26aea3e6-571c-4751-8b0f-40a86a144973.json
deleted file mode 100644
index e2a00f3ad6d0bd74b87198e80234d246ae171598..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Galactic-Qwen-14B-Exp1/26aea3e6-571c-4751-8b0f-40a86a144973.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/prithivMLmods_Galactic-Qwen-14B-Exp1/1762652580.463281",
- "retrieved_timestamp": "1762652580.463281",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "prithivMLmods/Galactic-Qwen-14B-Exp1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "prithivMLmods/Galactic-Qwen-14B-Exp1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5832202999153357
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6582262489447345
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40181268882175225
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3934563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4780520833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.539561170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Galactic-Qwen-14B-Exp2/2fcdb8f8-5ec6-494a-b690-fa96febdb02a.json b/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Galactic-Qwen-14B-Exp2/2fcdb8f8-5ec6-494a-b690-fa96febdb02a.json
deleted file mode 100644
index 35bc920aca6df53b21dbbf47ac70591029ce7f67..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Galactic-Qwen-14B-Exp2/2fcdb8f8-5ec6-494a-b690-fa96febdb02a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/prithivMLmods_Galactic-Qwen-14B-Exp2/1762652580.463546",
- "retrieved_timestamp": "1762652580.463547",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "prithivMLmods/Galactic-Qwen-14B-Exp2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "prithivMLmods/Galactic-Qwen-14B-Exp2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6620300801872365
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7203002699449659
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3474320241691843
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39932885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5353854166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5690658244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Magellanic-Qwen-25B-R999/08bfcf7b-e051-4c64-b1ee-0044cfa166f0.json b/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Magellanic-Qwen-25B-R999/08bfcf7b-e051-4c64-b1ee-0044cfa166f0.json
deleted file mode 100644
index c3a6c36c63387ae73010d1d81f14481bd52814dd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Magellanic-Qwen-25B-R999/08bfcf7b-e051-4c64-b1ee-0044cfa166f0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/prithivMLmods_Magellanic-Qwen-25B-R999/1762652580.466958",
- "retrieved_timestamp": "1762652580.466959",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "prithivMLmods/Magellanic-Qwen-25B-R999",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "prithivMLmods/Magellanic-Qwen-25B-R999"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18727199386516663
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26075689808294905
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.005287009063444109
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25083892617449666
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3831145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1299867021276596
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 24.962
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Qwen-7B-Distill-Reasoner/7afe076b-7f6a-42c1-9c43-652ea3ca94a9.json b/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Qwen-7B-Distill-Reasoner/7afe076b-7f6a-42c1-9c43-652ea3ca94a9.json
deleted file mode 100644
index 477c87b6adf4bfd6d160db22b5d489a24ece7459..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Qwen-7B-Distill-Reasoner/7afe076b-7f6a-42c1-9c43-652ea3ca94a9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/prithivMLmods_Qwen-7B-Distill-Reasoner/1762652580.474049",
- "retrieved_timestamp": "1762652580.47405",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "prithivMLmods/Qwen-7B-Distill-Reasoner",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "prithivMLmods/Qwen-7B-Distill-Reasoner"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3395712265677292
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4409329229697952
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3950151057401813
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3271812080536913
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36596874999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2818317819148936
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Qwen2.5-14B-DeepSeek-R1-1M/eacd8987-9631-4199-97ef-2cdc41879e8b.json b/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Qwen2.5-14B-DeepSeek-R1-1M/eacd8987-9631-4199-97ef-2cdc41879e8b.json
deleted file mode 100644
index ae9a366cf26c4fc97f2b1d47f109ca9165401219..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Qwen2.5-14B-DeepSeek-R1-1M/eacd8987-9631-4199-97ef-2cdc41879e8b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/prithivMLmods_Qwen2.5-14B-DeepSeek-R1-1M/1762652580.474647",
- "retrieved_timestamp": "1762652580.474647",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "prithivMLmods/Qwen2.5-14B-DeepSeek-R1-1M"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4192808415005519
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5934849375153814
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5128398791540786
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33221476510067116
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4606041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48994348404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Qwen2.5-7B-DeepSeek-R1-1M/4edb337d-b56c-4009-9199-22223d4ff9f8.json b/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Qwen2.5-7B-DeepSeek-R1-1M/4edb337d-b56c-4009-9199-22223d4ff9f8.json
deleted file mode 100644
index 26092916bf186c6df506b4816c2962b688c3c282..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/prithivMLmods_Qwen2.5-7B-DeepSeek-R1-1M/4edb337d-b56c-4009-9199-22223d4ff9f8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/prithivMLmods_Qwen2.5-7B-DeepSeek-R1-1M/1762652580.474907",
- "retrieved_timestamp": "1762652580.4749079",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "prithivMLmods/Qwen2.5-7B-DeepSeek-R1-1M"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18612282078219125
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3125554204779005
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015105740181268883
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3416875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12009640957446809
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/qingy2024_Qwen2.5-4B/c332cc18-e556-4b23-a45d-df26c250faa2.json b/leaderboard_data/HFOpenLLMv2/alibaba/qingy2024_Qwen2.5-4B/c332cc18-e556-4b23-a45d-df26c250faa2.json
deleted file mode 100644
index 757becf25b37fe65b9f4c30032c6d82add809c5f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/qingy2024_Qwen2.5-4B/c332cc18-e556-4b23-a45d-df26c250faa2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/qingy2024_Qwen2.5-4B/1762652580.486805",
- "retrieved_timestamp": "1762652580.486807",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "qingy2024/Qwen2.5-4B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "qingy2024/Qwen2.5-4B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21584839337402537
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4269378314466817
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0513595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46103125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2524933510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 4.168
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-Coder-V2.5-Qwen-14b/4f7b356a-1484-458c-8bc1-2640e039ab70.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-Coder-V2.5-Qwen-14b/4f7b356a-1484-458c-8bc1-2640e039ab70.json
deleted file mode 100644
index ae24aa29dcc1fb1c249520f831271f363593dea5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-Coder-V2.5-Qwen-14b/4f7b356a-1484-458c-8bc1-2640e039ab70.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-Coder-V2.5-Qwen-14b/1762652580.496415",
- "retrieved_timestamp": "1762652580.496416",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "rombodawg/Rombos-Coder-V2.5-Qwen-14b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "rombodawg/Rombos-Coder-V2.5-Qwen-14b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7047445223119102
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6165135323666455
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3300604229607251
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3914583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3939494680851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-Coder-V2.5-Qwen-7b/ca077d1a-a122-4040-b7d9-924773ce67ca.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-Coder-V2.5-Qwen-7b/ca077d1a-a122-4040-b7d9-924773ce67ca.json
deleted file mode 100644
index 4a2c44996aba8ad6151dbf5c3f1302065b8091c0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-Coder-V2.5-Qwen-7b/ca077d1a-a122-4040-b7d9-924773ce67ca.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-Coder-V2.5-Qwen-7b/1762652580.4966788",
- "retrieved_timestamp": "1762652580.49668",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "rombodawg/Rombos-Coder-V2.5-Qwen-7b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "rombodawg/Rombos-Coder-V2.5-Qwen-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6210388436016436
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5077090028113894
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3338368580060423
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2835570469798658
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3979375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33976063829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-0.5b/796ed438-2be4-45e6-9de9-c98ddd51f3d4.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-0.5b/796ed438-2be4-45e6-9de9-c98ddd51f3d4.json
deleted file mode 100644
index 6800e6ecefbd8441545bf1dcef4009d7575e8c27..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-0.5b/796ed438-2be4-45e6-9de9-c98ddd51f3d4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-0.5b/1762652580.4969",
- "retrieved_timestamp": "1762652580.4969",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "rombodawg/Rombos-LLM-V2.5-Qwen-0.5b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "rombodawg/Rombos-LLM-V2.5-Qwen-0.5b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28466690603155187
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32936751831436256
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06797583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26677852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32358333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18658577127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-1.5b/51f579c0-b5b4-4e01-9c19-b68fb6a21210.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-1.5b/51f579c0-b5b4-4e01-9c19-b68fb6a21210.json
deleted file mode 100644
index 0a80ca51860a4a81f58c333f0984a052ed249a19..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-1.5b/51f579c0-b5b4-4e01-9c19-b68fb6a21210.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-1.5b/1762652580.497122",
- "retrieved_timestamp": "1762652580.497123",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "rombodawg/Rombos-LLM-V2.5-Qwen-1.5b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "rombodawg/Rombos-LLM-V2.5-Qwen-1.5b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3402461025634206
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4256703145864387
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08534743202416918
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28859060402684567
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4185520833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2922207446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-14b/91ec838e-699a-4c68-aa42-a9f0b3b6b0c2.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-14b/91ec838e-699a-4c68-aa42-a9f0b3b6b0c2.json
deleted file mode 100644
index 5e4103fafce4361dedf5d0d34c155c4dd149e164..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-14b/91ec838e-699a-4c68-aa42-a9f0b3b6b0c2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-14b/1762652580.4975061",
- "retrieved_timestamp": "1762652580.497507",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "rombodawg/Rombos-LLM-V2.5-Qwen-14b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "rombodawg/Rombos-LLM-V2.5-Qwen-14b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5840447789642593
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6481086261669653
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4554380664652568
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3716442953020134
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4717291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5375664893617021
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-32b/07e926c9-d8bb-41da-b41e-8fddc9fb99d8.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-32b/07e926c9-d8bb-41da-b41e-8fddc9fb99d8.json
deleted file mode 100644
index 42247a7172be0ac286158541617bd67b4baff507..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-32b/07e926c9-d8bb-41da-b41e-8fddc9fb99d8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-32b/1762652580.497819",
- "retrieved_timestamp": "1762652580.49782",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "rombodawg/Rombos-LLM-V2.5-Qwen-32b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "rombodawg/Rombos-LLM-V2.5-Qwen-32b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6826631116548536
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7045537070859799
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4954682779456193
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39681208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5034166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5915890957446809
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-3b/976e132a-8352-43fd-abdf-0fc4a04e9429.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-3b/976e132a-8352-43fd-abdf-0fc4a04e9429.json
deleted file mode 100644
index d0af513b8282d284698b7eb4d2a39e39cd1b68e5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-3b/976e132a-8352-43fd-abdf-0fc4a04e9429.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-3b/1762652580.498058",
- "retrieved_timestamp": "1762652580.498058",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "rombodawg/Rombos-LLM-V2.5-Qwen-3b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "rombodawg/Rombos-LLM-V2.5-Qwen-3b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5342358276040905
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4808896246368473
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2794561933534743
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4041666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37608045212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.397
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-72b/1ae05e9f-d432-4e7f-a662-4b4a118333d9.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-72b/1ae05e9f-d432-4e7f-a662-4b4a118333d9.json
deleted file mode 100644
index 3b4ca47cfad4071ba18a888375dad68621aed8bc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-72b/1ae05e9f-d432-4e7f-a662-4b4a118333d9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-72b/1762652580.498325",
- "retrieved_timestamp": "1762652580.498326",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "rombodawg/Rombos-LLM-V2.5-Qwen-72b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "rombodawg/Rombos-LLM-V2.5-Qwen-72b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.715535889218385
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7229589065788488
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5422960725075529
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39848993288590606
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4599166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.593500664893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.706
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-7b/23ec1efe-a9a1-41cb-9695-4be0ceb3c199.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-7b/23ec1efe-a9a1-41cb-9695-4be0ceb3c199.json
deleted file mode 100644
index e8c91f5749777a17e17e7fdf5acf34a8b6192b43..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5-Qwen-7b/23ec1efe-a9a1-41cb-9695-4be0ceb3c199.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5-Qwen-7b/1762652580.498573",
- "retrieved_timestamp": "1762652580.498574",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "rombodawg/Rombos-LLM-V2.5-Qwen-7b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "rombodawg/Rombos-LLM-V2.5-Qwen-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6237117514860571
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5543885046903589
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3814199395770393
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3179530201342282
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42909375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4468916223404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5.1-Qwen-3b/3f1ffcf0-10bb-46b2-ae30-3eb958e943a1.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5.1-Qwen-3b/3f1ffcf0-10bb-46b2-ae30-3eb958e943a1.json
deleted file mode 100644
index c4848a828a4711d68bb6594d6b1eeaa1d5faf78d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5.1-Qwen-3b/3f1ffcf0-10bb-46b2-ae30-3eb958e943a1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5.1-Qwen-3b/1762652580.498805",
- "retrieved_timestamp": "1762652580.498805",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2595125378440316
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3884043024656656
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09138972809667675
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39911458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27194148936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.397
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5.1-Qwen-3b/91240596-5842-4441-b976-01ed7545bd1f.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5.1-Qwen-3b/91240596-5842-4441-b976-01ed7545bd1f.json
deleted file mode 100644
index 15a8604dbad29d7740a8712913b38051c9c6910d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.5.1-Qwen-3b/91240596-5842-4441-b976-01ed7545bd1f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.5.1-Qwen-3b/1762652580.499037",
- "retrieved_timestamp": "1762652580.499037",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "rombodawg/Rombos-LLM-V2.5.1-Qwen-3b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2566401592219755
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39000839740376536
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12084592145015106
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39911458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27410239361702127
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.397
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.6-Qwen-14b/5842364a-2721-4882-90f3-97eba7c3b93a.json b/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.6-Qwen-14b/5842364a-2721-4882-90f3-97eba7c3b93a.json
deleted file mode 100644
index 31b859d81dc231b44377a459213243a7682e1e32..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/rombodawg_Rombos-LLM-V2.6-Qwen-14b/5842364a-2721-4882-90f3-97eba7c3b93a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/rombodawg_Rombos-LLM-V2.6-Qwen-14b/1762652580.499588",
- "retrieved_timestamp": "1762652580.4995892",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "rombodawg/Rombos-LLM-V2.6-Qwen-14b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "rombodawg/Rombos-LLM-V2.6-Qwen-14b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8431550508207113
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6442096596344892
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5211480362537765
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3338926174496644
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4220625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49609375
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/securin_Securin-LLM-V2.5-Qwen-1.5B/cbd0163f-fbea-4f40-a26b-a0508ec02061.json b/leaderboard_data/HFOpenLLMv2/alibaba/securin_Securin-LLM-V2.5-Qwen-1.5B/cbd0163f-fbea-4f40-a26b-a0508ec02061.json
deleted file mode 100644
index cc3d9ae47c6587488a45966d3b1015855e7749c8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/securin_Securin-LLM-V2.5-Qwen-1.5B/cbd0163f-fbea-4f40-a26b-a0508ec02061.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/securin_Securin-LLM-V2.5-Qwen-1.5B/1762652580.510926",
- "retrieved_timestamp": "1762652580.5109272",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "securin/Securin-LLM-V2.5-Qwen-1.5B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "securin/Securin-LLM-V2.5-Qwen-1.5B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1492030035860406
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3158416288115425
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.024924471299093656
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3606354166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16148603723404256
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.543
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sethuiyer_Qwen2.5-7B-Anvita/f2571e64-be03-4482-b5b4-d120444b0586.json b/leaderboard_data/HFOpenLLMv2/alibaba/sethuiyer_Qwen2.5-7B-Anvita/f2571e64-be03-4482-b5b4-d120444b0586.json
deleted file mode 100644
index 48af2c9a36f4c74f74a3507ff21cb1851c0fcfdb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sethuiyer_Qwen2.5-7B-Anvita/f2571e64-be03-4482-b5b4-d120444b0586.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sethuiyer_Qwen2.5-7B-Anvita/1762652580.514066",
- "retrieved_timestamp": "1762652580.514067",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sethuiyer/Qwen2.5-7B-Anvita",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sethuiyer/Qwen2.5-7B-Anvita"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6480416406246536
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5465860266784314
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20166163141993956
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3271812080536913
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43365625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4165558510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/someon98_qwen-CoMa-0.5b/be4ee67a-59d7-4098-992e-5f75cd53cdbc.json b/leaderboard_data/HFOpenLLMv2/alibaba/someon98_qwen-CoMa-0.5b/be4ee67a-59d7-4098-992e-5f75cd53cdbc.json
deleted file mode 100644
index 54b109ff5e9014f6ce7047c5457885b7da5e04b8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/someon98_qwen-CoMa-0.5b/be4ee67a-59d7-4098-992e-5f75cd53cdbc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/someon98_qwen-CoMa-0.5b/1762652580.518077",
- "retrieved_timestamp": "1762652580.5180779",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "someon98/qwen-CoMa-0.5b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "someon98/qwen-CoMa-0.5b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22766371006706648
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29533439538939815
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.004531722054380665
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23993288590604026
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40457291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10987367021276596
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Lamarck-14B-v0.4-Qwenvergence/41393c10-c1e5-4ccd-bcb1-df5392cb8ec6.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Lamarck-14B-v0.4-Qwenvergence/41393c10-c1e5-4ccd-bcb1-df5392cb8ec6.json
deleted file mode 100644
index 9b119388ed727cea61b75b7411fa1980c7644064..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Lamarck-14B-v0.4-Qwenvergence/41393c10-c1e5-4ccd-bcb1-df5392cb8ec6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Lamarck-14B-v0.4-Qwenvergence/1762652580.5196202",
- "retrieved_timestamp": "1762652580.5196211",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Lamarck-14B-v0.4-Qwenvergence",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Lamarck-14B-v0.4-Qwenvergence"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4906470387460826
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6535142192324058
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33987915407854985
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3783557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4846875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5406416223404256
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen-14B-ProseStock-v4/e68bc90b-1274-4e28-b280-65e6ceba53f8.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen-14B-ProseStock-v4/e68bc90b-1274-4e28-b280-65e6ceba53f8.json
deleted file mode 100644
index 585967e2fe662ab8ade9483ea3e3ec944c1cc335..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen-14B-ProseStock-v4/e68bc90b-1274-4e28-b280-65e6ceba53f8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen-14B-ProseStock-v4/1762652580.522184",
- "retrieved_timestamp": "1762652580.5221848",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwen-14B-ProseStock-v4",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwen-14B-ProseStock-v4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4942186731206532
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6498268976192769
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3640483383685801
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3884228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49383333333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5386469414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen-2.5-14B-Virmarckeoso/dc7af75a-f45a-449a-b6ba-cc033d7de79f.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen-2.5-14B-Virmarckeoso/dc7af75a-f45a-449a-b6ba-cc033d7de79f.json
deleted file mode 100644
index ce8eec1e423bad4f36920f28f32e4530ad4810de..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen-2.5-14B-Virmarckeoso/dc7af75a-f45a-449a-b6ba-cc033d7de79f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen-2.5-14B-Virmarckeoso/1762652580.5224378",
- "retrieved_timestamp": "1762652580.522439",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwen-2.5-14B-Virmarckeoso",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwen-2.5-14B-Virmarckeoso"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4813295389566351
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6569729950776678
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3564954682779456
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37919463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4793541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5377327127659575
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v2/5242491e-deb4-41ae-8d70-5b0d8ffb7bc7.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v2/5242491e-deb4-41ae-8d70-5b0d8ffb7bc7.json
deleted file mode 100644
index d8056cc58b824ef117e6fc2b9b7cc1f071fb5b7b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v2/5242491e-deb4-41ae-8d70-5b0d8ffb7bc7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v2/1762652580.52286",
- "retrieved_timestamp": "1762652580.522861",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4505301488938239
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6550336897572636
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3580060422960725
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3825503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48189583333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5379820478723404
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-IF-Variant/9df5ab5a-16cf-478f-87f0-1b8717e1e330.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-IF-Variant/9df5ab5a-16cf-478f-87f0-1b8717e1e330.json
deleted file mode 100644
index f5ee338698853f6fccdb1441b0c89e9ac9b13a45..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-IF-Variant/9df5ab5a-16cf-478f-87f0-1b8717e1e330.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-IF-Variant/1762652580.523307",
- "retrieved_timestamp": "1762652580.523308",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-IF-Variant"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6412973133507981
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5520788965536542
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2545317220543807
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34731543624161076
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5319166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4588597074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-Prose01/dd84656a-3b61-4241-a2eb-a5f52ff58ed2.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-Prose01/dd84656a-3b61-4241-a2eb-a5f52ff58ed2.json
deleted file mode 100644
index 5eed745fd33713a0b84db1dc6345c8d9d3f60a4b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-Prose01/dd84656a-3b61-4241-a2eb-a5f52ff58ed2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-Prose01/1762652580.523516",
- "retrieved_timestamp": "1762652580.523516",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6872343160591674
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6358769213927613
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3995468277945619
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38674496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48071875000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5275099734042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-model_stock/ba7b8cb4-608a-4bf0-b107-51e721f88dee.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-model_stock/ba7b8cb4-608a-4bf0-b107-51e721f88dee.json
deleted file mode 100644
index 7140ca4c6d12feafee08bbb03283b4a7918bdc84..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-model_stock/ba7b8cb4-608a-4bf0-b107-51e721f88dee.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3-model_stock/1762652580.5237172",
- "retrieved_timestamp": "1762652580.5237179",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7161852772864887
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6420915332649074
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4244712990936556
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3800335570469799
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47811458333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5315824468085106
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3/9e453ef2-bae1-4a06-8778-d9c0dfae33e8.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3/9e453ef2-bae1-4a06-8778-d9c0dfae33e8.json
deleted file mode 100644
index 532b1aec036c7a59cfd88875d166aaecb80a3212..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3/9e453ef2-bae1-4a06-8778-d9c0dfae33e8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso-v3/1762652580.52309",
- "retrieved_timestamp": "1762652580.52309",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7256523801291683
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.641460062329604
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4003021148036254
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3800335570469799
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4806875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5343251329787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso/b3b73406-3b25-4a23-9e13-53fafdd66552.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso/b3b73406-3b25-4a23-9e13-53fafdd66552.json
deleted file mode 100644
index 9fafa714074bd6255965e3ddd20b7bc233faa20a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-14B-Vimarckoso/b3b73406-3b25-4a23-9e13-53fafdd66552.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-14B-Vimarckoso/1762652580.522644",
- "retrieved_timestamp": "1762652580.522645",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwen2.5-14B-Vimarckoso"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45742407922091166
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6446348390056346
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.338368580060423
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3926174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4858645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5329122340425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-7B-Gordion-v0.1-Prose/dceb35c6-30bb-483c-aa62-8273b409311b.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-7B-Gordion-v0.1-Prose/dceb35c6-30bb-483c-aa62-8273b409311b.json
deleted file mode 100644
index 1e1a42d358b2e38978b1c15256e253f6791b2c44..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-7B-Gordion-v0.1-Prose/dceb35c6-30bb-483c-aa62-8273b409311b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-7B-Gordion-v0.1-Prose/1762652580.524123",
- "retrieved_timestamp": "1762652580.524123",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Prose"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5347101246913745
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5599089581177875
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2892749244712991
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45017708333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4525432180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-7B-Gordion-v0.1-Reason/100a253a-3409-4145-8a9d-0bf821e3ce91.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-7B-Gordion-v0.1-Reason/100a253a-3409-4145-8a9d-0bf821e3ce91.json
deleted file mode 100644
index 4a291e1e6193c037520cb8586722369499cf4578..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-7B-Gordion-v0.1-Reason/100a253a-3409-4145-8a9d-0bf821e3ce91.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-7B-Gordion-v0.1-Reason/1762652580.5243258",
- "retrieved_timestamp": "1762652580.5243268",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1-Reason"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49172085621705963
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5498169530870823
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2620845921450151
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34060402684563756
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4434166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4306848404255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-7B-Gordion-v0.1/174b2a17-c4fa-4021-868b-9c23a99603c9.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-7B-Gordion-v0.1/174b2a17-c4fa-4021-868b-9c23a99603c9.json
deleted file mode 100644
index 79eadfa22c5e2633ec88085882722464581d0e38..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwen2.5-7B-Gordion-v0.1/174b2a17-c4fa-4021-868b-9c23a99603c9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwen2.5-7B-Gordion-v0.1/1762652580.5239239",
- "retrieved_timestamp": "1762652580.523925",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwen2.5-7B-Gordion-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.748183708116686
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5523808037550308
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29154078549848944
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40162499999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43001994680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentessential-14B-v1/3cce1e77-5dfc-44d2-b0c2-f7220d989e9d.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentessential-14B-v1/3cce1e77-5dfc-44d2-b0c2-f7220d989e9d.json
deleted file mode 100644
index 54c8eedfca1daf17f64ca0fdd9be7b6c2306bea7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentessential-14B-v1/3cce1e77-5dfc-44d2-b0c2-f7220d989e9d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentessential-14B-v1/1762652580.524672",
- "retrieved_timestamp": "1762652580.524674",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwentessential-14B-v1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwentessential-14B-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6279083941719084
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6545165968552056
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4070996978851964
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3875838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4872916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5381482712765957
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v013/8127e367-fbd2-475d-a4f0-b8895dec6741.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v013/8127e367-fbd2-475d-a4f0-b8895dec6741.json
deleted file mode 100644
index 284844c2bc834b14552cf2af8a54ebcea6edaaf4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v013/8127e367-fbd2-475d-a4f0-b8895dec6741.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v013/1762652580.5250719",
- "retrieved_timestamp": "1762652580.525074",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwentinuum-14B-v013",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwentinuum-14B-v013"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6711226213114536
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6086634082040333
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37084592145015105
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3573825503355705
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5154166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49908577127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v1/c68a024d-fa21-4584-bde5-42121e919af7.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v1/c68a024d-fa21-4584-bde5-42121e919af7.json
deleted file mode 100644
index a78e2e058f5a72f775f6b1f5956a9dc611c58a36..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v1/c68a024d-fa21-4584-bde5-42121e919af7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v1/1762652580.5253482",
- "retrieved_timestamp": "1762652580.5253491",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwentinuum-14B-v1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwentinuum-14B-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5031616111916382
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6572572845221036
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36027190332326287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3825503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4780520833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5409740691489362
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v2/ce1feb87-4f78-4ff1-a548-b3409591166f.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v2/ce1feb87-4f78-4ff1-a548-b3409591166f.json
deleted file mode 100644
index 2f39447b7c968220890ef8ed802b5c976102a4ce..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v2/ce1feb87-4f78-4ff1-a548-b3409591166f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v2/1762652580.525585",
- "retrieved_timestamp": "1762652580.525586",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwentinuum-14B-v2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwentinuum-14B-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5378329499062487
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6555355668062347
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37537764350453173
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3884228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47141666666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5408909574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v3/96b75db5-4e23-4179-bbf7-801f35d31af7.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v3/96b75db5-4e23-4179-bbf7-801f35d31af7.json
deleted file mode 100644
index 18ac34a8130fe9f2a8d77a4ecafcf4572b4d84a7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v3/96b75db5-4e23-4179-bbf7-801f35d31af7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v3/1762652580.525815",
- "retrieved_timestamp": "1762652580.525816",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwentinuum-14B-v3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwentinuum-14B-v3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6157683834448153
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6538645567116264
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35347432024169184
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3875838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48598958333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5413065159574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v5/16e0de9b-9717-4451-babc-8df8748c4efe.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v5/16e0de9b-9717-4451-babc-8df8748c4efe.json
deleted file mode 100644
index 774ae77f2cd4aad5373c447a133282c01f3c64de..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v5/16e0de9b-9717-4451-babc-8df8748c4efe.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v5/1762652580.5261161",
- "retrieved_timestamp": "1762652580.526117",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwentinuum-14B-v5",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwentinuum-14B-v5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.628557782240012
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.654985060704008
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34441087613293053
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3875838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4873854166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5418051861702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v6-Prose/8eecc1a5-d42e-423c-9155-daf66a414361.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v6-Prose/8eecc1a5-d42e-423c-9155-daf66a414361.json
deleted file mode 100644
index 77fde0e804cd664ba199195a0b09e660691fe2b7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v6-Prose/8eecc1a5-d42e-423c-9155-daf66a414361.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v6-Prose/1762652580.52656",
- "retrieved_timestamp": "1762652580.526561",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwentinuum-14B-v6-Prose",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwentinuum-14B-v6-Prose"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5642860942299764
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6545112522796068
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37009063444108764
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3884228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4912604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5392287234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v6/93e0bcb6-be72-4e9c-adbc-c8fce3240b0d.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v6/93e0bcb6-be72-4e9c-adbc-c8fce3240b0d.json
deleted file mode 100644
index 1ac01b8c10462a47ba5e9a00240e2dd0c12ecd9c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v6/93e0bcb6-be72-4e9c-adbc-c8fce3240b0d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v6/1762652580.526352",
- "retrieved_timestamp": "1762652580.526353",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwentinuum-14B-v6",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwentinuum-14B-v6"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6304062110755019
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6544517420216159
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36027190332326287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38674496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48995833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5399767287234043
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v7/6aaa1633-f780-42d4-b43e-5a4d31cf7aae.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v7/6aaa1633-f780-42d4-b43e-5a4d31cf7aae.json
deleted file mode 100644
index 569ccae138329a26d9ddf8c885d0af543d058c81..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v7/6aaa1633-f780-42d4-b43e-5a4d31cf7aae.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v7/1762652580.526774",
- "retrieved_timestamp": "1762652580.526774",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwentinuum-14B-v7",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwentinuum-14B-v7"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6109223526908603
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6551430222697051
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35725075528700906
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39093959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48198958333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5409740691489362
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v8/6be09829-08e5-4d45-a091-5451f6c74d51.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v8/6be09829-08e5-4d45-a091-5451f6c74d51.json
deleted file mode 100644
index c977a33245c753abdf4fb5c1ec7fea8983e85007..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v8/6be09829-08e5-4d45-a091-5451f6c74d51.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v8/1762652580.526987",
- "retrieved_timestamp": "1762652580.526987",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwentinuum-14B-v8",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwentinuum-14B-v8"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5411552458587658
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6534258495008117
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39123867069486407
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38338926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48732291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5412234042553191
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v9/cea3e14d-a43d-4e32-b8fc-d8ae995190d8.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v9/cea3e14d-a43d-4e32-b8fc-d8ae995190d8.json
deleted file mode 100644
index 26d3b46e0e720be409ccbd4c1fc28a7a3b6ed895..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwentinuum-14B-v9/cea3e14d-a43d-4e32-b8fc-d8ae995190d8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwentinuum-14B-v9/1762652580.5271978",
- "retrieved_timestamp": "1762652580.527199",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwentinuum-14B-v9",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwentinuum-14B-v9"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5107304175144174
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6580257842849174
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34818731117824775
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3859060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47811458333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5421376329787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-qv256/f06fc349-e84e-4ec7-a9c9-8819896c2beb.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-qv256/f06fc349-e84e-4ec7-a9c9-8819896c2beb.json
deleted file mode 100644
index 7521b02e6952c99769399b42308bad855670fa56..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-qv256/f06fc349-e84e-4ec7-a9c9-8819896c2beb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-qv256/1762652580.52741",
- "retrieved_timestamp": "1762652580.527411",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwenvergence-14B-qv256",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwenvergence-14B-qv256"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7006232352380573
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6312084721949004
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38972809667673713
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3783557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49259375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5177859042553191
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v0.6-004-model_stock/86591e86-5bfb-4e8e-b910-bf6b5011562c.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v0.6-004-model_stock/86591e86-5bfb-4e8e-b910-bf6b5011562c.json
deleted file mode 100644
index e85b3e090aa8d62c8e432cfb09ab18cc25efafe7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v0.6-004-model_stock/86591e86-5bfb-4e8e-b910-bf6b5011562c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v0.6-004-model_stock/1762652580.5276191",
- "retrieved_timestamp": "1762652580.52762",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwenvergence-14B-v0.6-004-model_stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6859854076073706
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6249338707540049
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4093655589123867
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38338926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5033229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.519281914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v10/f2b35397-f539-4129-8e1f-f9dae9c9431b.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v10/f2b35397-f539-4129-8e1f-f9dae9c9431b.json
deleted file mode 100644
index 4fac89f06cf5f50fde019d3a0254dd04a097daa1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v10/f2b35397-f539-4129-8e1f-f9dae9c9431b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v10/1762652580.5278451",
- "retrieved_timestamp": "1762652580.5278451",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwenvergence-14B-v10",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwenvergence-14B-v10"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6756938257157675
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6316425399409628
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4788519637462236
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37919463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49913541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.523936170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v11/50ae9dc0-efcc-43cb-8704-6dfb9270656a.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v11/50ae9dc0-efcc-43cb-8704-6dfb9270656a.json
deleted file mode 100644
index 2961ff5b9b295d1529cfefda0f93b0c29535bc7e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v11/50ae9dc0-efcc-43cb-8704-6dfb9270656a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v11/1762652580.528142",
- "retrieved_timestamp": "1762652580.5281432",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwenvergence-14B-v11",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwenvergence-14B-v11"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7192327468893647
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6367548394062034
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4645015105740181
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3724832214765101
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4754479166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5327460106382979
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v12-Prose-DS/a6c5b80d-e685-405a-8444-1be1ed763d2e.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v12-Prose-DS/a6c5b80d-e685-405a-8444-1be1ed763d2e.json
deleted file mode 100644
index e0ff01d64cfc2fa0a7711c1fe7d40f844c37d619..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v12-Prose-DS/a6c5b80d-e685-405a-8444-1be1ed763d2e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v12-Prose-DS/1762652580.52859",
- "retrieved_timestamp": "1762652580.5285912",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwenvergence-14B-v12-Prose-DS",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwenvergence-14B-v12-Prose-DS"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6173419859306639
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6506726813719318
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43051359516616317
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39429530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5150729166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5369015957446809
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v12-Prose/052e63b2-028b-4a4a-ae2b-51514e982239.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v12-Prose/052e63b2-028b-4a4a-ae2b-51514e982239.json
deleted file mode 100644
index 13ae2aea4ce5f52e83a6c82da67d24d98a9774ef..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v12-Prose/052e63b2-028b-4a4a-ae2b-51514e982239.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v12-Prose/1762652580.52837",
- "retrieved_timestamp": "1762652580.5283709",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwenvergence-14B-v12-Prose",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwenvergence-14B-v12-Prose"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5412051135431766
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6504247508173936
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35347432024169184
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38674496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49913541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5381482712765957
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v13-Prose-DS/f205507c-48ef-4a40-a0e8-39f5f7bf2cdb.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v13-Prose-DS/f205507c-48ef-4a40-a0e8-39f5f7bf2cdb.json
deleted file mode 100644
index e2bf3265f18b29c26c958aa52ac8c5f9dd3a9008..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v13-Prose-DS/f205507c-48ef-4a40-a0e8-39f5f7bf2cdb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v13-Prose-DS/1762652580.528805",
- "retrieved_timestamp": "1762652580.528806",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwenvergence-14B-v13-Prose-DS",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwenvergence-14B-v13-Prose-DS"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.717808747456748
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6405077084802886
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3859516616314199
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38338926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49265625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.534906914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v15-Prose-MS/a9434630-a7cd-4dc1-b542-e76402344166.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v15-Prose-MS/a9434630-a7cd-4dc1-b542-e76402344166.json
deleted file mode 100644
index 37511333111219c653f0b43410539d285a71d786..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v15-Prose-MS/a9434630-a7cd-4dc1-b542-e76402344166.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v15-Prose-MS/1762652580.529013",
- "retrieved_timestamp": "1762652580.529014",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwenvergence-14B-v15-Prose-MS",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwenvergence-14B-v15-Prose-MS"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5032114788760489
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6550130348108012
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3632930513595166
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3951342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4912916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.539311835106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v2-Prose/f639d7e3-ffb9-4dc5-ab20-993522afa5b4.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v2-Prose/f639d7e3-ffb9-4dc5-ab20-993522afa5b4.json
deleted file mode 100644
index 933c88b5d375b98488b44a9d1ac7ba10c9298848..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v2-Prose/f639d7e3-ffb9-4dc5-ab20-993522afa5b4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v2-Prose/1762652580.529223",
- "retrieved_timestamp": "1762652580.529224",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwenvergence-14B-v2-Prose",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwenvergence-14B-v2-Prose"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47048830436574957
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6518830473518972
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3557401812688822
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3934563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49259375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5371509308510638
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3-Prose/37c4d6b3-9964-45d3-a6ed-8b84229ed304.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3-Prose/37c4d6b3-9964-45d3-a6ed-8b84229ed304.json
deleted file mode 100644
index de26eea967683d9d6ac6043a6887dbb81ec8c45c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3-Prose/37c4d6b3-9964-45d3-a6ed-8b84229ed304.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v3-Prose/1762652580.5297742",
- "retrieved_timestamp": "1762652580.5297751",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwenvergence-14B-v3-Prose",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwenvergence-14B-v3-Prose"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49177072390147036
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6512913170949324
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3648036253776435
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3951342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49389583333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5369847074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3-Reason/50c37538-a425-4b30-a9e0-9a60f6b2492f.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3-Reason/50c37538-a425-4b30-a9e0-9a60f6b2492f.json
deleted file mode 100644
index 205eb17aaa24420106abdaf9710b7a75e0108055..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3-Reason/50c37538-a425-4b30-a9e0-9a60f6b2492f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v3-Reason/1762652580.530208",
- "retrieved_timestamp": "1762652580.530208",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwenvergence-14B-v3-Reason",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwenvergence-14B-v3-Reason"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5278161943642867
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6557437566824342
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3119335347432024
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38422818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47541666666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5396442819148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3-Reason/58ac7b57-e498-4de0-95aa-475c9c56aaf6.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3-Reason/58ac7b57-e498-4de0-95aa-475c9c56aaf6.json
deleted file mode 100644
index 89fb71c5be753ab5a728057fe6c63317f5c08d34..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3-Reason/58ac7b57-e498-4de0-95aa-475c9c56aaf6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v3-Reason/1762652580.530001",
- "retrieved_timestamp": "1762652580.530001",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwenvergence-14B-v3-Reason",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwenvergence-14B-v3-Reason"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5366837768232734
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6561283957466177
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3580060422960725
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38674496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47402083333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5394780585106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3/6cefa467-dae0-4b8b-bd5c-3343f1bfe111.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3/6cefa467-dae0-4b8b-bd5c-3343f1bfe111.json
deleted file mode 100644
index 8f91d2249c43f62e98aa682072ef0cbae3f6d8ea..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v3/6cefa467-dae0-4b8b-bd5c-3343f1bfe111.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v3/1762652580.529505",
- "retrieved_timestamp": "1762652580.529512",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwenvergence-14B-v3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwenvergence-14B-v3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.504410519643435
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.654823836148701
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3693353474320242
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38422818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48859375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5386469414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v6-Prose-model_stock/7f57b41f-d8e8-46a0-ad1f-2638e287bce7.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v6-Prose-model_stock/7f57b41f-d8e8-46a0-ad1f-2638e287bce7.json
deleted file mode 100644
index 4ef51a35bb47dd5ef0d1983a72698f99dc88900c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v6-Prose-model_stock/7f57b41f-d8e8-46a0-ad1f-2638e287bce7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v6-Prose-model_stock/1762652580.530609",
- "retrieved_timestamp": "1762652580.5306098",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwenvergence-14B-v6-Prose-model_stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48110458029140457
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6530441861690175
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36027190332326287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3934563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48989583333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5387300531914894
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v6-Prose/fa88bc37-eb6b-4d69-8983-7a489ab09665.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v6-Prose/fa88bc37-eb6b-4d69-8983-7a489ab09665.json
deleted file mode 100644
index 93e2861fbb371525ade40beb685af124b5fa4019..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v6-Prose/fa88bc37-eb6b-4d69-8983-7a489ab09665.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v6-Prose/1762652580.530398",
- "retrieved_timestamp": "1762652580.530399",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwenvergence-14B-v6-Prose",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwenvergence-14B-v6-Prose"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5990073006289978
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6543750230807198
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3564954682779456
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3884228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48865625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5370678191489362
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v8/9332e745-f594-40a9-af22-98709efc179d.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v8/9332e745-f594-40a9-af22-98709efc179d.json
deleted file mode 100644
index 777ff7dce5453d29e3da608467010102d97274ad..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v8/9332e745-f594-40a9-af22-98709efc179d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v8/1762652580.530813",
- "retrieved_timestamp": "1762652580.530813",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwenvergence-14B-v8",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwenvergence-14B-v8"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5913387589373973
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6522455361956444
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40483383685800606
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47678125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.543467420212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v9/65c35557-ec37-49c3-b7f6-11ce837500f0.json b/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v9/65c35557-ec37-49c3-b7f6-11ce837500f0.json
deleted file mode 100644
index 6d6ac2297ab0663a3d7b94c10147010ccf276076..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sometimesanotion_Qwenvergence-14B-v9/65c35557-ec37-49c3-b7f6-11ce837500f0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sometimesanotion_Qwenvergence-14B-v9/1762652580.531015",
- "retrieved_timestamp": "1762652580.5310159",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sometimesanotion/Qwenvergence-14B-v9",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sometimesanotion/Qwenvergence-14B-v9"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6598070896332842
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6165623747365094
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41389728096676737
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36828859060402686
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5141145833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5110538563829787
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_Qwenftmodel/aece90fe-f0eb-4c34-afd0-7a4fc36dc385.json b/leaderboard_data/HFOpenLLMv2/alibaba/sumink_Qwenftmodel/aece90fe-f0eb-4c34-afd0-7a4fc36dc385.json
deleted file mode 100644
index 257279a6c185454baf3ef5d83990580d30b477dc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_Qwenftmodel/aece90fe-f0eb-4c34-afd0-7a4fc36dc385.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sumink_Qwenftmodel/1762652580.5454028",
- "retrieved_timestamp": "1762652580.545404",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sumink/Qwenftmodel",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sumink/Qwenftmodel"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17290899258412123
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38226970256668574
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0891238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25671140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36171875000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23387632978723405
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_Qwenmplus/fc41cf78-6547-4fe6-83aa-ef5edd99a392.json b/leaderboard_data/HFOpenLLMv2/alibaba/sumink_Qwenmplus/fc41cf78-6547-4fe6-83aa-ef5edd99a392.json
deleted file mode 100644
index d9b4c88e761be8c1b23d53c53d4efd560f548ca5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_Qwenmplus/fc41cf78-6547-4fe6-83aa-ef5edd99a392.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sumink_Qwenmplus/1762652580.5456882",
- "retrieved_timestamp": "1762652580.545689",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sumink/Qwenmplus",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sumink/Qwenmplus"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20403307668098425
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3675511408391697
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.024924471299093656
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38283333333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19921875
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.543
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_Qwensci/57a9ff0c-795f-45c4-b0c7-ad0c7400c88d.json b/leaderboard_data/HFOpenLLMv2/alibaba/sumink_Qwensci/57a9ff0c-795f-45c4-b0c7-ad0c7400c88d.json
deleted file mode 100644
index 2327f4066088579f83a035720be75797d08dda3a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_Qwensci/57a9ff0c-795f-45c4-b0c7-ad0c7400c88d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sumink_Qwensci/1762652580.545888",
- "retrieved_timestamp": "1762652580.5458891",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sumink/Qwensci",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sumink/Qwensci"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17398281005509825
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3281870591856875
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02039274924471299
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25838926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3608854166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12599734042553193
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.543
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen/7c73720a-03d8-4d90-9557-cd579c7c3e86.json b/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen/7c73720a-03d8-4d90-9557-cd579c7c3e86.json
deleted file mode 100644
index 7f96fc0af2bb90cc4c43d4b1d5f0b52997cc37a1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen/7c73720a-03d8-4d90-9557-cd579c7c3e86.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sumink_bbhqwen/1762652580.546088",
- "retrieved_timestamp": "1762652580.546089",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sumink/bbhqwen",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sumink/bbhqwen"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18085236062536292
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3388245916050106
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.010574018126888218
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43523958333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16165226063829788
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen2/b4dbcb3f-11dd-4bce-9d45-869ae7c8f9b1.json b/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen2/b4dbcb3f-11dd-4bce-9d45-869ae7c8f9b1.json
deleted file mode 100644
index f2b27274d2bc49666d521b0aad0e7ebd62a5ad77..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen2/b4dbcb3f-11dd-4bce-9d45-869ae7c8f9b1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sumink_bbhqwen2/1762652580.546288",
- "retrieved_timestamp": "1762652580.546289",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sumink/bbhqwen2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sumink/bbhqwen2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15329991090307052
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30663248168563745
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.006042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44305208333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1149434840425532
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen3/b9dae1c0-8088-4ffb-9e91-0f6579b3147e.json b/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen3/b9dae1c0-8088-4ffb-9e91-0f6579b3147e.json
deleted file mode 100644
index 16e47c18d7ead9777b047cc66686de6cf4a769fa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen3/b9dae1c0-8088-4ffb-9e91-0f6579b3147e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sumink_bbhqwen3/1762652580.546491",
- "retrieved_timestamp": "1762652580.546491",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sumink/bbhqwen3",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sumink/bbhqwen3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1942911474886634
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2950842029929075
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3796145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11660571808510638
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen4/336dbfac-133a-46c8-87c9-40f1ad12a714.json b/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen4/336dbfac-133a-46c8-87c9-40f1ad12a714.json
deleted file mode 100644
index 5745c3da97512b0c39539b648353f38fcb47f5aa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen4/336dbfac-133a-46c8-87c9-40f1ad12a714.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sumink_bbhqwen4/1762652580.546697",
- "retrieved_timestamp": "1762652580.546698",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sumink/bbhqwen4",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sumink/bbhqwen4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14485675784695717
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3199395559502713
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.006042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24412751677852348
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4028958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15093085106382978
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen5/4b528bc8-e94a-4437-8c1c-bcd823bf5f45.json b/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen5/4b528bc8-e94a-4437-8c1c-bcd823bf5f45.json
deleted file mode 100644
index 91f789e2b21587697af42d5e03a06cf7c250385d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen5/4b528bc8-e94a-4437-8c1c-bcd823bf5f45.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sumink_bbhqwen5/1762652580.546902",
- "retrieved_timestamp": "1762652580.5469031",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sumink/bbhqwen5",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sumink/bbhqwen5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1521507378200951
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29130964476405813
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0022658610271903325
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4019375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11311502659574468
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen6/f585e5fe-c3b5-4134-97ed-67b57d74adb8.json b/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen6/f585e5fe-c3b5-4134-97ed-67b57d74adb8.json
deleted file mode 100644
index 56fe07f2459eabce439249902ec4ad8fcb25083f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/sumink_bbhqwen6/f585e5fe-c3b5-4134-97ed-67b57d74adb8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sumink_bbhqwen6/1762652580.547101",
- "retrieved_timestamp": "1762652580.547102",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sumink/bbhqwen6",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "sumink/bbhqwen6"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18929551368147626
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2782242419852629
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0007552870090634441
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25838926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35796875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11527593085106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/synergetic_FrankenQwen2.5-14B/5f69b85b-d66c-400b-8d40-58b96233ec3c.json b/leaderboard_data/HFOpenLLMv2/alibaba/synergetic_FrankenQwen2.5-14B/5f69b85b-d66c-400b-8d40-58b96233ec3c.json
deleted file mode 100644
index 80f877e727d5e4db1c5931960d7f515209baefca..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/synergetic_FrankenQwen2.5-14B/5f69b85b-d66c-400b-8d40-58b96233ec3c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/synergetic_FrankenQwen2.5-14B/1762652580.5505831",
- "retrieved_timestamp": "1762652580.550584",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "synergetic/FrankenQwen2.5-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "synergetic/FrankenQwen2.5-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1869472998311148
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6047748435655343
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3842604166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43816489361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 16.972
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-14b-tensopolis-v1/a3ff3d30-5dec-4ec3-87b9-004d570b005a.json b/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-14b-tensopolis-v1/a3ff3d30-5dec-4ec3-87b9-004d570b005a.json
deleted file mode 100644
index 65453582a854add385c6912bb159aa01d0062ccb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-14b-tensopolis-v1/a3ff3d30-5dec-4ec3-87b9-004d570b005a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/tensopolis_qwen2.5-14b-tensopolis-v1/1762652580.556658",
- "retrieved_timestamp": "1762652580.556659",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "tensopolis/qwen2.5-14b-tensopolis-v1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "tensopolis/qwen2.5-14b-tensopolis-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7990166092634211
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6363595324538928
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5294561933534743
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3347315436241611
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41933333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49110704787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-3b-or1-tensopolis/b79e1f6d-698d-4bde-b35f-3f31e09c9d6a.json b/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-3b-or1-tensopolis/b79e1f6d-698d-4bde-b35f-3f31e09c9d6a.json
deleted file mode 100644
index 44721a160c3716893bb047a0d4127c77c6d1cc8c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-3b-or1-tensopolis/b79e1f6d-698d-4bde-b35f-3f31e09c9d6a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/tensopolis_qwen2.5-3b-or1-tensopolis/1762652580.556941",
- "retrieved_timestamp": "1762652580.556942",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "tensopolis/qwen2.5-3b-or1-tensopolis",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "tensopolis/qwen2.5-3b-or1-tensopolis"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35400958346077294
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44214988544006467
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1729607250755287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37492708333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3197307180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-7b-tensopolis-v1/20854e9f-ba11-492c-8d81-08e13ca1ec35.json b/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-7b-tensopolis-v1/20854e9f-ba11-492c-8d81-08e13ca1ec35.json
deleted file mode 100644
index 0c2625ca5f84dad20dd4d25980ea9c1b0f85c307..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-7b-tensopolis-v1/20854e9f-ba11-492c-8d81-08e13ca1ec35.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/tensopolis_qwen2.5-7b-tensopolis-v1/1762652580.5571609",
- "retrieved_timestamp": "1762652580.557162",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "tensopolis/qwen2.5-7b-tensopolis-v1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "tensopolis/qwen2.5-7b-tensopolis-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7660939640154789
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5378740884658956
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4561933534743202
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.433875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42686170212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-7b-tensopolis-v2/e7862d19-b3d4-47f6-b174-b53015229a42.json b/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-7b-tensopolis-v2/e7862d19-b3d4-47f6-b174-b53015229a42.json
deleted file mode 100644
index aa6fb06cf6e0b27d8fb91476e77d062103f5f3e8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/tensopolis_qwen2.5-7b-tensopolis-v2/e7862d19-b3d4-47f6-b174-b53015229a42.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/tensopolis_qwen2.5-7b-tensopolis-v2/1762652580.5574138",
- "retrieved_timestamp": "1762652580.5574138",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "tensopolis/qwen2.5-7b-tensopolis-v2",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "tensopolis/qwen2.5-7b-tensopolis-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.752105524452896
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5414622323974015
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4818731117824773
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42463541666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42428523936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/theprint_ReWiz-Qwen-2.5-14B/9a4e6a55-e39e-4da6-b4bb-670cbd75d5c6.json b/leaderboard_data/HFOpenLLMv2/alibaba/theprint_ReWiz-Qwen-2.5-14B/9a4e6a55-e39e-4da6-b4bb-670cbd75d5c6.json
deleted file mode 100644
index 36a763cdb4ef94100b0b7152a42ff50814ccb9ba..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/theprint_ReWiz-Qwen-2.5-14B/9a4e6a55-e39e-4da6-b4bb-670cbd75d5c6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/theprint_ReWiz-Qwen-2.5-14B/1762652580.563489",
- "retrieved_timestamp": "1762652580.5634902",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "theprint/ReWiz-Qwen-2.5-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "theprint/ReWiz-Qwen-2.5-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27854647889821227
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6179492756426455
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29229607250755285
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3800335570469799
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45389583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5092253989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "?",
- "params_billions": 16.743
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2-1_5B/626a924c-618b-4047-bed3-9ff67b6e47ae.json b/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2-1_5B/626a924c-618b-4047-bed3-9ff67b6e47ae.json
deleted file mode 100644
index 0f910635fe4ee274c1f0a47760dbb8007f92ce72..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2-1_5B/626a924c-618b-4047-bed3-9ff67b6e47ae.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/thomas-yanxin_XinYuan-Qwen2-1_5B/1762652580.565519",
- "retrieved_timestamp": "1762652580.565519",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "thomas-yanxin/XinYuan-Qwen2-1_5B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "thomas-yanxin/XinYuan-Qwen2-1_5B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2985556102253133
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3635491993150823
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06722054380664652
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36339583333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23570478723404256
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2-7B-0917/0fac57c3-7bea-48fc-bb38-b679ab835d91.json b/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2-7B-0917/0fac57c3-7bea-48fc-bb38-b679ab835d91.json
deleted file mode 100644
index d52ef15b5fd9c6aaac6e28d30a40a33dc379b4f0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2-7B-0917/0fac57c3-7bea-48fc-bb38-b679ab835d91.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/thomas-yanxin_XinYuan-Qwen2-7B-0917/1762652580.56599",
- "retrieved_timestamp": "1762652580.565991",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "thomas-yanxin/XinYuan-Qwen2-7B-0917",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "thomas-yanxin/XinYuan-Qwen2-7B-0917"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37191983935956596
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5169215573786009
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19788519637462235
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4401041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4245345744680851
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2-7B/5e0690cd-21e6-4778-8af9-7d9f623f5f52.json b/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2-7B/5e0690cd-21e6-4778-8af9-7d9f623f5f52.json
deleted file mode 100644
index 6cc03251a26db181f7efeee45c3ef48c8d9c6974..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2-7B/5e0690cd-21e6-4778-8af9-7d9f623f5f52.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/thomas-yanxin_XinYuan-Qwen2-7B/1762652580.565779",
- "retrieved_timestamp": "1762652580.56578",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "thomas-yanxin/XinYuan-Qwen2-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "thomas-yanxin/XinYuan-Qwen2-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44376033369238066
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4936629157238895
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14577039274924472
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40581249999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3924534574468085
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2.5-7B-0917/6dc1a4e7-6ce6-4337-a242-420fe4139538.json b/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2.5-7B-0917/6dc1a4e7-6ce6-4337-a242-420fe4139538.json
deleted file mode 100644
index 7e83a64a6b8ed41138b1d401c92887de5532c592..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/thomas-yanxin_XinYuan-Qwen2.5-7B-0917/6dc1a4e7-6ce6-4337-a242-420fe4139538.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/thomas-yanxin_XinYuan-Qwen2.5-7B-0917/1762652580.5662022",
- "retrieved_timestamp": "1762652580.5662029",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "thomas-yanxin/XinYuan-Qwen2.5-7B-0917",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "thomas-yanxin/XinYuan-Qwen2.5-7B-0917"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35770644113175265
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5184106116987492
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1933534743202417
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3675520833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38821476063829785
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/v000000_Qwen2.5-14B-Gutenberg-1e-Delta/676745af-1929-4875-9a78-d57354883d75.json b/leaderboard_data/HFOpenLLMv2/alibaba/v000000_Qwen2.5-14B-Gutenberg-1e-Delta/676745af-1929-4875-9a78-d57354883d75.json
deleted file mode 100644
index 904f2bd43c0b2c5a2f1e6ba9dccd1370f6fbd163..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/v000000_Qwen2.5-14B-Gutenberg-1e-Delta/676745af-1929-4875-9a78-d57354883d75.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/v000000_Qwen2.5-14B-Gutenberg-1e-Delta/1762652580.584905",
- "retrieved_timestamp": "1762652580.584906",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "v000000/Qwen2.5-14B-Gutenberg-1e-Delta",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "v000000/Qwen2.5-14B-Gutenberg-1e-Delta"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8045120280854798
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.639849930188539
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5264350453172205
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3288590604026846
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40730208333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4930186170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/v000000_Qwen2.5-Lumen-14B/7b134cb3-7794-4984-9240-b889e2a3b6b4.json b/leaderboard_data/HFOpenLLMv2/alibaba/v000000_Qwen2.5-Lumen-14B/7b134cb3-7794-4984-9240-b889e2a3b6b4.json
deleted file mode 100644
index 36d14a54efe3fec3736754f6320b80476cbbd57d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/v000000_Qwen2.5-Lumen-14B/7b134cb3-7794-4984-9240-b889e2a3b6b4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/v000000_Qwen2.5-Lumen-14B/1762652580.585356",
- "retrieved_timestamp": "1762652580.585357",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "v000000/Qwen2.5-Lumen-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "v000000/Qwen2.5-Lumen-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8063604569209697
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6390809511149668
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5362537764350453
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32802013422818793
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41139583333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49027593085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/vonjack_Qwen2.5-Coder-0.5B-Merged/76b52fe1-c232-47d9-8052-077a945364cd.json b/leaderboard_data/HFOpenLLMv2/alibaba/vonjack_Qwen2.5-Coder-0.5B-Merged/76b52fe1-c232-47d9-8052-077a945364cd.json
deleted file mode 100644
index bdc68cf09663160eee3921641c80bb65db3101fa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/vonjack_Qwen2.5-Coder-0.5B-Merged/76b52fe1-c232-47d9-8052-077a945364cd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/vonjack_Qwen2.5-Coder-0.5B-Merged/1762652580.5902011",
- "retrieved_timestamp": "1762652580.590202",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "vonjack/Qwen2.5-Coder-0.5B-Merged",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "vonjack/Qwen2.5-Coder-0.5B-Merged"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30997087727230416
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3076017752057237
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0377643504531722
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2533557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33034375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12017952127659574
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.63
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/wave-on-discord_qwent-7b/1dc524b8-18d6-4bc0-9146-713ef8abd983.json b/leaderboard_data/HFOpenLLMv2/alibaba/wave-on-discord_qwent-7b/1dc524b8-18d6-4bc0-9146-713ef8abd983.json
deleted file mode 100644
index e0a7281b5462c7f6e082a8f4eacc3b75a44d0932..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/wave-on-discord_qwent-7b/1dc524b8-18d6-4bc0-9146-713ef8abd983.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/wave-on-discord_qwent-7b/1762652580.592784",
- "retrieved_timestamp": "1762652580.592785",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "wave-on-discord/qwent-7b",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "wave-on-discord/qwent-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20148539209297997
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4228103286118343
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0037764350453172208
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38165625000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16032247340425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/win10_EVA-Norns-Qwen2.5-v0.1/5b8044df-ce6a-4a5e-9aed-d657188fa114.json b/leaderboard_data/HFOpenLLMv2/alibaba/win10_EVA-Norns-Qwen2.5-v0.1/5b8044df-ce6a-4a5e-9aed-d657188fa114.json
deleted file mode 100644
index 7f7e56a32ac252a6e18427673d9f959b1470153d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/win10_EVA-Norns-Qwen2.5-v0.1/5b8044df-ce6a-4a5e-9aed-d657188fa114.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/win10_EVA-Norns-Qwen2.5-v0.1/1762652580.594388",
- "retrieved_timestamp": "1762652580.594388",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "win10/EVA-Norns-Qwen2.5-v0.1",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "win10/EVA-Norns-Qwen2.5-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6219630580193884
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.507240838017382
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26132930513595165
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40451041666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3425033244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/win10_Norns-Qwen2.5-12B/4ff2e991-ee62-467e-9fec-cdf334ca7fca.json b/leaderboard_data/HFOpenLLMv2/alibaba/win10_Norns-Qwen2.5-12B/4ff2e991-ee62-467e-9fec-cdf334ca7fca.json
deleted file mode 100644
index f3cfda531f02b3de8f2b446ecd2f0b19bda12e37..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/win10_Norns-Qwen2.5-12B/4ff2e991-ee62-467e-9fec-cdf334ca7fca.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/win10_Norns-Qwen2.5-12B/1762652580.594881",
- "retrieved_timestamp": "1762652580.594882",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "win10/Norns-Qwen2.5-12B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "win10/Norns-Qwen2.5-12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48969733640074997
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46189201103923744
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08383685800604229
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2835570469798658
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3554895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2660405585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 12.277
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/win10_Norns-Qwen2.5-7B/2451252e-2cf6-4394-9009-544630696c75.json b/leaderboard_data/HFOpenLLMv2/alibaba/win10_Norns-Qwen2.5-7B/2451252e-2cf6-4394-9009-544630696c75.json
deleted file mode 100644
index 3c29c9340c529ed110a7efe45e8a7723a28dc757..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/win10_Norns-Qwen2.5-7B/2451252e-2cf6-4394-9009-544630696c75.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/win10_Norns-Qwen2.5-7B/1762652580.5950878",
- "retrieved_timestamp": "1762652580.595089",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "win10/Norns-Qwen2.5-7B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "win10/Norns-Qwen2.5-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6122211288270678
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5072887832228614
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2628398791540785
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40847916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34133976063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alibaba/x0000001_Deepseek-Lumen-R1-Qwen2.5-14B/9d6eb7bc-965e-4de8-bccf-0590ad55ce6d.json b/leaderboard_data/HFOpenLLMv2/alibaba/x0000001_Deepseek-Lumen-R1-Qwen2.5-14B/9d6eb7bc-965e-4de8-bccf-0590ad55ce6d.json
deleted file mode 100644
index f5e00a60b156d32cc754db477e870d4fa257f2d2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alibaba/x0000001_Deepseek-Lumen-R1-Qwen2.5-14B/9d6eb7bc-965e-4de8-bccf-0590ad55ce6d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/x0000001_Deepseek-Lumen-R1-Qwen2.5-14B/1762652580.596637",
- "retrieved_timestamp": "1762652580.596638",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "x0000001/Deepseek-Lumen-R1-Qwen2.5-14B",
- "developer": "alibaba",
- "inference_platform": "unknown",
- "id": "x0000001/Deepseek-Lumen-R1-Qwen2.5-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4436107306391486
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45690468424066283
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27794561933534745
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47396875000000005
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4379155585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-70B-DPO/b790e9c5-2412-4aa0-a975-37b8662a82cf.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-70B-DPO/b790e9c5-2412-4aa0-a975-37b8662a82cf.json
deleted file mode 100644
index 570d57724e141158892cb45157e9bb6fcc19afb5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-70B-DPO/b790e9c5-2412-4aa0-a975-37b8662a82cf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-70B-DPO/1762652579.9821",
- "retrieved_timestamp": "1762652579.982101",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allenai/Llama-3.1-Tulu-3-70B-DPO",
- "developer": "allenai",
- "inference_platform": "unknown",
- "id": "allenai/Llama-3.1-Tulu-3-70B-DPO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8281925291559729
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6146203626958501
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44939577039274925
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37583892617449666
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4922604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4632646276595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-70B-SFT/6921281e-5756-4f0d-a37c-3b05ff6b2703.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-70B-SFT/6921281e-5756-4f0d-a37c-3b05ff6b2703.json
deleted file mode 100644
index 7905f87c57c9e677e1ee864a429c7d291c819401..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-70B-SFT/6921281e-5756-4f0d-a37c-3b05ff6b2703.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-70B-SFT/1762652579.982346",
- "retrieved_timestamp": "1762652579.982346",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allenai/Llama-3.1-Tulu-3-70B-SFT",
- "developer": "allenai",
- "inference_platform": "unknown",
- "id": "allenai/Llama-3.1-Tulu-3-70B-SFT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8050616807847621
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5951437800580934
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33157099697885195
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3447986577181208
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5026145833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46243351063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-8B-DPO/81bd1edf-be5b-4ae6-a2cc-723aaa040eb9.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-8B-DPO/81bd1edf-be5b-4ae6-a2cc-723aaa040eb9.json
deleted file mode 100644
index 6d43708bec77d22e403250d49a632b9e20a10751..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-8B-DPO/81bd1edf-be5b-4ae6-a2cc-723aaa040eb9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-8B-DPO/1762652579.9829278",
- "retrieved_timestamp": "1762652579.982929",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allenai/Llama-3.1-Tulu-3-8B-DPO",
- "developer": "allenai",
- "inference_platform": "unknown",
- "id": "allenai/Llama-3.1-Tulu-3-8B-DPO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8029384255996312
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4079428557044153
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.236404833836858
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41613541666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2898105053191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-8B-SFT/35674acb-a68c-4ac1-9aac-ac9cb44801e6.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-8B-SFT/35674acb-a68c-4ac1-9aac-ac9cb44801e6.json
deleted file mode 100644
index 736b53a5e051744117d64774cee528a8556d91a3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_Llama-3.1-Tulu-3-8B-SFT/35674acb-a68c-4ac1-9aac-ac9cb44801e6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allenai_Llama-3.1-Tulu-3-8B-SFT/1762652579.983397",
- "retrieved_timestamp": "1762652579.983398",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allenai/Llama-3.1-Tulu-3-8B-SFT",
- "developer": "allenai",
- "inference_platform": "unknown",
- "id": "allenai/Llama-3.1-Tulu-3-8B-SFT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7403400754442657
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3871863270501647
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11782477341389729
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4267708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28116688829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-1.7-7B-hf/5d7caae7-0242-4a5d-b3be-c677b958d130.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-1.7-7B-hf/5d7caae7-0242-4a5d-b3be-c677b958d130.json
deleted file mode 100644
index 167c2b63d0e1e6452c2ad64721cdad8468c0eb7b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-1.7-7B-hf/5d7caae7-0242-4a5d-b3be-c677b958d130.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allenai_OLMo-1.7-7B-hf/1762652579.9836009",
- "retrieved_timestamp": "1762652579.9836018",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allenai/OLMo-1.7-7B-hf",
- "developer": "allenai",
- "inference_platform": "unknown",
- "id": "allenai/OLMo-1.7-7B-hf"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1568970332052288
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3013695911207614
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0022658610271903325
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2550335570469799
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34748958333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11236702127659574
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Unknown",
- "params_billions": 0.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-1B-hf/d13f5416-1d95-431b-8f01-b969066ec960.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-1B-hf/d13f5416-1d95-431b-8f01-b969066ec960.json
deleted file mode 100644
index b986f095113ea0c317a3e4fac048bbea02896b9f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-1B-hf/d13f5416-1d95-431b-8f01-b969066ec960.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allenai_OLMo-1B-hf/1762652579.983823",
- "retrieved_timestamp": "1762652579.983823",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allenai/OLMo-1B-hf",
- "developer": "allenai",
- "inference_platform": "unknown",
- "id": "allenai/OLMo-1B-hf"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21819660722438686
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30519468988429327
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.017371601208459216
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40978125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11735372340425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "OlmoForCausalLM",
- "params_billions": 1.177
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-2-1124-7B-Instruct/17df660f-6a91-476f-a7e8-7169eef1c24d.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-2-1124-7B-Instruct/17df660f-6a91-476f-a7e8-7169eef1c24d.json
deleted file mode 100644
index ccfccbab9d7a51f8aa2fc5d0864a02e6bf375f48..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-2-1124-7B-Instruct/17df660f-6a91-476f-a7e8-7169eef1c24d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allenai_OLMo-2-1124-7B-Instruct/1762652579.9840362",
- "retrieved_timestamp": "1762652579.9840372",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allenai/OLMo-2-1124-7B-Instruct",
- "developer": "allenai",
- "inference_platform": "unknown",
- "id": "allenai/OLMo-2-1124-7B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7244034716773715
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40223602474417786
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1487915407854985
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2785234899328859
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35083333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2672041223404255
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Olmo2ForCausalLM",
- "params_billions": 7.299
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-7B-Instruct-hf/7ff78ffd-c934-4a17-b30d-2d8267f3e25a.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-7B-Instruct-hf/7ff78ffd-c934-4a17-b30d-2d8267f3e25a.json
deleted file mode 100644
index a9e86c4b2d282e42473de60f2d04353f76049634..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-7B-Instruct-hf/7ff78ffd-c934-4a17-b30d-2d8267f3e25a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allenai_OLMo-7B-Instruct-hf/1762652579.98445",
- "retrieved_timestamp": "1762652579.984452",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allenai/OLMo-7B-Instruct-hf",
- "developer": "allenai",
- "inference_platform": "unknown",
- "id": "allenai/OLMo-7B-Instruct-hf"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3472652561869174
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3706469866662716
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.013595166163141994
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37647916666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17852393617021275
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "OlmoForCausalLM",
- "params_billions": 7.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-7B-hf/6308f97d-aecd-467a-91f0-5a1650ccc22a.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-7B-hf/6308f97d-aecd-467a-91f0-5a1650ccc22a.json
deleted file mode 100644
index 3e9e0c916d208d022edc5b521766d72f451df1ba..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMo-7B-hf/6308f97d-aecd-467a-91f0-5a1650ccc22a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allenai_OLMo-7B-hf/1762652579.984753",
- "retrieved_timestamp": "1762652579.984753",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allenai/OLMo-7B-hf",
- "developer": "allenai",
- "inference_platform": "unknown",
- "id": "allenai/OLMo-7B-hf"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2719273749207658
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32791316587362274
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.012084592145015106
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3486666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11727061170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "OlmoForCausalLM",
- "params_billions": 6.888
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMoE-1B-7B-0125-Instruct/af176c4c-b06f-44ac-bcba-1331d9148958.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMoE-1B-7B-0125-Instruct/af176c4c-b06f-44ac-bcba-1331d9148958.json
deleted file mode 100644
index ac6d05767eb643a4ddb192ef0f39533ed757874d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMoE-1B-7B-0125-Instruct/af176c4c-b06f-44ac-bcba-1331d9148958.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allenai_OLMoE-1B-7B-0125-Instruct/1762652579.984983",
- "retrieved_timestamp": "1762652579.984983",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allenai/OLMoE-1B-7B-0125-Instruct",
- "developer": "allenai",
- "inference_platform": "unknown",
- "id": "allenai/OLMoE-1B-7B-0125-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6757436934001781
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38245348916008676
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08987915407854985
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3635833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19148936170212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "OlmoeForCausalLM",
- "params_billions": 6.919
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMoE-1B-7B-0924-Instruct/a580b690-0829-43b9-8d52-6dd226208901.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMoE-1B-7B-0924-Instruct/a580b690-0829-43b9-8d52-6dd226208901.json
deleted file mode 100644
index 5545b69e092f9ca39c90eb203cf37300decf471f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMoE-1B-7B-0924-Instruct/a580b690-0829-43b9-8d52-6dd226208901.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allenai_OLMoE-1B-7B-0924-Instruct/1762652579.98542",
- "retrieved_timestamp": "1762652579.98542",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allenai/OLMoE-1B-7B-0924-Instruct",
- "developer": "allenai",
- "inference_platform": "unknown",
- "id": "allenai/OLMoE-1B-7B-0924-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4667415790103592
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3901610626816106
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.027945619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3848229166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18758311170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "OlmoeForCausalLM",
- "params_billions": 6.919
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMoE-1B-7B-0924/af1bb542-77cb-47e2-89f1-16cc91e89452.json b/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMoE-1B-7B-0924/af1bb542-77cb-47e2-89f1-16cc91e89452.json
deleted file mode 100644
index c6ede33c39b1f7ba0fb14e24cc54167a2616eae8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allenai/allenai_OLMoE-1B-7B-0924/af1bb542-77cb-47e2-89f1-16cc91e89452.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allenai_OLMoE-1B-7B-0924/1762652579.985209",
- "retrieved_timestamp": "1762652579.9852102",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allenai/OLMoE-1B-7B-0924",
- "developer": "allenai",
- "inference_platform": "unknown",
- "id": "allenai/OLMoE-1B-7B-0924"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21847143357402804
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3393437931177341
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01661631419939577
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24748322147651006
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34879166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1739527925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "OlmoeForCausalLM",
- "params_billions": 6.919
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Chocolatine-24B/9d3d89f9-e792-4b33-91d1-41f84ca1cc68.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Chocolatine-24B/9d3d89f9-e792-4b33-91d1-41f84ca1cc68.json
deleted file mode 100644
index 75060bc0ecdbda92bd446579f94a9c64669e9909..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Chocolatine-24B/9d3d89f9-e792-4b33-91d1-41f84ca1cc68.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Chocolatine-24B/1762652579.9856288",
- "retrieved_timestamp": "1762652579.98563",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Chocolatine-24B",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Chocolatine-24B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19581488229010136
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6191260063262436
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0007552870090634441
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32550335570469796
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43232291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4566156914893617
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Phi3ForCausalLM",
- "params_billions": 24.184
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp1-7B/340dfc7b-9af0-4545-9d7b-6950ea69bd57.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp1-7B/340dfc7b-9af0-4545-9d7b-6950ea69bd57.json
deleted file mode 100644
index adc973a8ed443cb81f444d7ffb948602a0c17932..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp1-7B/340dfc7b-9af0-4545-9d7b-6950ea69bd57.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_HomerSlerp1-7B/1762652579.988248",
- "retrieved_timestamp": "1762652579.988249",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/HomerSlerp1-7B",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/HomerSlerp1-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46212050692163464
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.551818027489446
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2719033232628399
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3179530201342282
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43585416666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4503823138297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp2-7B/ea9cc238-75d0-45e7-b10e-e214516ca36e.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp2-7B/ea9cc238-75d0-45e7-b10e-e214516ca36e.json
deleted file mode 100644
index 2ff386ed6ac2775beef895830a5cd894f3b74d57..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp2-7B/ea9cc238-75d0-45e7-b10e-e214516ca36e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_HomerSlerp2-7B/1762652579.988459",
- "retrieved_timestamp": "1762652579.98846",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/HomerSlerp2-7B",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/HomerSlerp2-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44868172005833407
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5648943315947
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29682779456193353
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43557291666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45146276595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp3-7B/a8a69b0c-02c9-437d-975d-69f1ddc6959a.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp3-7B/a8a69b0c-02c9-437d-975d-69f1ddc6959a.json
deleted file mode 100644
index 50e886a4f316469757e6acb1890a0a54afd8501c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp3-7B/a8a69b0c-02c9-437d-975d-69f1ddc6959a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_HomerSlerp3-7B/1762652579.988729",
- "retrieved_timestamp": "1762652579.9887302",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/HomerSlerp3-7B",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/HomerSlerp3-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4362668829815999
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5598063466560873
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3021148036253776
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31711409395973156
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44617708333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45345744680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp4-7B/988da677-c00d-4e7c-847e-6ca553e0124b.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp4-7B/988da677-c00d-4e7c-847e-6ca553e0124b.json
deleted file mode 100644
index fd6785d19ed3a6c0dbabfb0c0adc9cb5cb858cf5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_HomerSlerp4-7B/988da677-c00d-4e7c-847e-6ca553e0124b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_HomerSlerp4-7B/1762652579.988936",
- "retrieved_timestamp": "1762652579.988937",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/HomerSlerp4-7B",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/HomerSlerp4-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43741605606457534
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5570767234678723
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3270392749244713
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44084375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44722406914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_LimyQstar-7B-slerp/ac45b8ec-454f-4a91-9418-a3dc70535119.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_LimyQstar-7B-slerp/ac45b8ec-454f-4a91-9418-a3dc70535119.json
deleted file mode 100644
index 9e97b0bd249b7ddf456363f3f13695ccbb4d1b3d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_LimyQstar-7B-slerp/ac45b8ec-454f-4a91-9418-a3dc70535119.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_LimyQstar-7B-slerp/1762652579.98914",
- "retrieved_timestamp": "1762652579.989141",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/LimyQstar-7B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/LimyQstar-7B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34911368502240725
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5023559424245442
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06873111782477341
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4146458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3103390957446808
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Marco-01-slerp1-7B/1b8abf32-6b66-4e9b-9b82-e1978d07a483.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Marco-01-slerp1-7B/1b8abf32-6b66-4e9b-9b82-e1978d07a483.json
deleted file mode 100644
index d5a360d486905bd4dd42cf8aa0552c795883bee8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Marco-01-slerp1-7B/1b8abf32-6b66-4e9b-9b82-e1978d07a483.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Marco-01-slerp1-7B/1762652579.989768",
- "retrieved_timestamp": "1762652579.98977",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Marco-01-slerp1-7B",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Marco-01-slerp1-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46811571075856506
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5540943469864194
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3157099697885196
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31711409395973156
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4451875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44830452127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Meme-7B-slerp/8eaa7d3f-0217-4ed3-9367-9e0f9c0926fe.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Meme-7B-slerp/8eaa7d3f-0217-4ed3-9367-9e0f9c0926fe.json
deleted file mode 100644
index 87b5f1c807864a14df74c90bab03bbfc003ee9b3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Meme-7B-slerp/8eaa7d3f-0217-4ed3-9367-9e0f9c0926fe.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Meme-7B-slerp/1762652579.9900281",
- "retrieved_timestamp": "1762652579.990029",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Meme-7B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Meme-7B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5163754393897082
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4660944195552204
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04380664652567976
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2860738255033557
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4223020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.281000664893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ministral-8B-slerp/effba194-3b2a-4847-9708-e3cb62a7c964.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ministral-8B-slerp/effba194-3b2a-4847-9708-e3cb62a7c964.json
deleted file mode 100644
index e0c5b7973ea6fe25881d0edfa6615809240ca0ba..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ministral-8B-slerp/effba194-3b2a-4847-9708-e3cb62a7c964.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Ministral-8B-slerp/1762652579.990243",
- "retrieved_timestamp": "1762652579.9902442",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Ministral-8B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Ministral-8B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19608970863974257
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4686018544963986
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0037764350453172208
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31208053691275167
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42853125000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3119182180851064
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MixTAO-19B-pass/275fb96e-4779-479b-937b-f5db6aa530ea.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MixTAO-19B-pass/275fb96e-4779-479b-937b-f5db6aa530ea.json
deleted file mode 100644
index 66843533d540d1290cd9bb0500f31926ee2b4a07..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MixTAO-19B-pass/275fb96e-4779-479b-937b-f5db6aa530ea.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_MixTAO-19B-pass/1762652579.991234",
- "retrieved_timestamp": "1762652579.991235",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/MixTAO-19B-pass",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/MixTAO-19B-pass"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3814368098866563
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5128248798224987
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06117824773413897
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47827083333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31050531914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 19.188
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MixTaoTruthful-13B-slerp/003c05a1-abb7-41d3-a264-efc6923b64ef.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MixTaoTruthful-13B-slerp/003c05a1-abb7-41d3-a264-efc6923b64ef.json
deleted file mode 100644
index c195b1ec8b99826f7e68ce972a9ab0b84b4b33da..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MixTaoTruthful-13B-slerp/003c05a1-abb7-41d3-a264-efc6923b64ef.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_MixTaoTruthful-13B-slerp/1762652579.991453",
- "retrieved_timestamp": "1762652579.991454",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/MixTaoTruthful-13B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/MixTaoTruthful-13B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41388515804731446
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5207335343585151
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06646525679758308
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42924999999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3100066489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 12.879
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiCalm-7B-slerp/36176ae9-e852-4604-9961-b7f02e4c3e55.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiCalm-7B-slerp/36176ae9-e852-4604-9961-b7f02e4c3e55.json
deleted file mode 100644
index 7be0632d917d103debdbc12eeda2ef187c37d485..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiCalm-7B-slerp/36176ae9-e852-4604-9961-b7f02e4c3e55.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_MultiCalm-7B-slerp/1762652579.991671",
- "retrieved_timestamp": "1762652579.991672",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/MultiCalm-7B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/MultiCalm-7B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3926526061960044
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5121891599770304
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.061933534743202415
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43194791666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3032746010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash-12B-slerp/ed27cd90-e73f-4432-aed9-dd36f29cba1a.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash-12B-slerp/ed27cd90-e73f-4432-aed9-dd36f29cba1a.json
deleted file mode 100644
index 4b1e940114aff2a59ea0038fa09d7011a94dc3d4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash-12B-slerp/ed27cd90-e73f-4432-aed9-dd36f29cba1a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash-12B-slerp/1762652579.991891",
- "retrieved_timestamp": "1762652579.9918919",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/MultiMash-12B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/MultiMash-12B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39744876926554873
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5141827379810838
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08081570996978851
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27684563758389263
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44379166666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3067652925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 12.879
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash10-13B-slerp/7e4b1f44-73f9-4a6d-9d66-91c60e69e3d2.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash10-13B-slerp/7e4b1f44-73f9-4a6d-9d66-91c60e69e3d2.json
deleted file mode 100644
index e05563b6b1954ae33471e02a3c4d6b861c166875..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash10-13B-slerp/7e4b1f44-73f9-4a6d-9d66-91c60e69e3d2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash10-13B-slerp/1762652579.992115",
- "retrieved_timestamp": "1762652579.992116",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/MultiMash10-13B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/MultiMash10-13B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41628323958208663
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5186335995744094
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07175226586102719
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2860738255033557
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43179166666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3116688829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 12.879
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash11-13B-slerp/1b3bfb2a-8290-4af0-bdac-24397a5b6f86.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash11-13B-slerp/1b3bfb2a-8290-4af0-bdac-24397a5b6f86.json
deleted file mode 100644
index 4ede973cae22eb8e8277a149c0af949c569f7686..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash11-13B-slerp/1b3bfb2a-8290-4af0-bdac-24397a5b6f86.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash11-13B-slerp/1762652579.992343",
- "retrieved_timestamp": "1762652579.9923441",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/MultiMash11-13B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/MultiMash11-13B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4251009543566625
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5193864686484946
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0702416918429003
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43728125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30851063829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 12.879
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash2-12B-slerp/af52a422-e959-4662-98e8-c94fa83bee3e.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash2-12B-slerp/af52a422-e959-4662-98e8-c94fa83bee3e.json
deleted file mode 100644
index 2a5d0962bb8f911cfdd279f27cb76fe17b22da91..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash2-12B-slerp/af52a422-e959-4662-98e8-c94fa83bee3e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash2-12B-slerp/1762652579.992556",
- "retrieved_timestamp": "1762652579.992556",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/MultiMash2-12B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/MultiMash2-12B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42607503645881817
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5133973498532299
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06419939577039276
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4228020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3042719414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 12.879
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash5-12B-slerp/df7621bc-5af2-45c5-b8e4-ebc158dad966.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash5-12B-slerp/df7621bc-5af2-45c5-b8e4-ebc158dad966.json
deleted file mode 100644
index 28ce8261d791abf981e0ba02c16267b12e81df91..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash5-12B-slerp/df7621bc-5af2-45c5-b8e4-ebc158dad966.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash5-12B-slerp/1762652579.992772",
- "retrieved_timestamp": "1762652579.992772",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/MultiMash5-12B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/MultiMash5-12B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41415998439695567
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5144534995858502
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0634441087613293
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4202916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30277593085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 12.879
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash6-12B-slerp/195b1c31-c766-479c-a445-39a6150404fc.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash6-12B-slerp/195b1c31-c766-479c-a445-39a6150404fc.json
deleted file mode 100644
index 064be03c1e720f03a3bcb05909a6d133507baa66..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash6-12B-slerp/195b1c31-c766-479c-a445-39a6150404fc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash6-12B-slerp/1762652579.992992",
- "retrieved_timestamp": "1762652579.992993",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/MultiMash6-12B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/MultiMash6-12B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43004672047943904
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5195916915718951
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07250755287009064
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4305833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30909242021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 12.879
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash7-12B-slerp/141507b5-67df-4c38-9eeb-b9d3cf98b08f.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash7-12B-slerp/141507b5-67df-4c38-9eeb-b9d3cf98b08f.json
deleted file mode 100644
index f83395c6127cd2a60be2cb3939ad76d2825b6180..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash7-12B-slerp/141507b5-67df-4c38-9eeb-b9d3cf98b08f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash7-12B-slerp/1762652579.993205",
- "retrieved_timestamp": "1762652579.993206",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/MultiMash7-12B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/MultiMash7-12B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42127887338927383
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5111135397195524
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06948640483383686
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2785234899328859
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42794791666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3029421542553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 12.879
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash8-13B-slerp/54a836bc-8048-4c2b-a65a-937acc2fa414.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash8-13B-slerp/54a836bc-8048-4c2b-a65a-937acc2fa414.json
deleted file mode 100644
index a4512a92e89d8b79dd02bcdd13a20e7997adf564..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash8-13B-slerp/54a836bc-8048-4c2b-a65a-937acc2fa414.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash8-13B-slerp/1762652579.9938078",
- "retrieved_timestamp": "1762652579.99381",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/MultiMash8-13B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/MultiMash8-13B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4320702402957486
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5178483059643324
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0770392749244713
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28859060402684567
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4423958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31258311170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 12.879
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash9-13B-slerp/6a0f5973-6377-4707-a0e3-414ca1f22b32.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash9-13B-slerp/6a0f5973-6377-4707-a0e3-414ca1f22b32.json
deleted file mode 100644
index 73796c0137c5421befee6ce520161ec7c4ee07f2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMash9-13B-slerp/6a0f5973-6377-4707-a0e3-414ca1f22b32.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMash9-13B-slerp/1762652579.994061",
- "retrieved_timestamp": "1762652579.994061",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/MultiMash9-13B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/MultiMash9-13B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4187810564856802
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5193579939678727
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07854984894259819
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4398229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3100066489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 12.879
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMerge-7B-slerp/f0aae363-f838-48c8-bf9e-b8e9f0e84a24.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMerge-7B-slerp/f0aae363-f838-48c8-bf9e-b8e9f0e84a24.json
deleted file mode 100644
index 85a7faca97879786ea26cc99a3b9c245c35e286b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiMerge-7B-slerp/f0aae363-f838-48c8-bf9e-b8e9f0e84a24.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_MultiMerge-7B-slerp/1762652579.994297",
- "retrieved_timestamp": "1762652579.994299",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/MultiMerge-7B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/MultiMerge-7B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3947758613811354
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5140224933103638
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06646525679758308
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42797916666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036901595744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Multimash3-12B-slerp/80aa0629-7ea1-4f69-b302-c0502abcbbab.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Multimash3-12B-slerp/80aa0629-7ea1-4f69-b302-c0502abcbbab.json
deleted file mode 100644
index 4f3af69c40e382115a8815cf44529b0bc8ded335..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Multimash3-12B-slerp/80aa0629-7ea1-4f69-b302-c0502abcbbab.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Multimash3-12B-slerp/1762652579.994557",
- "retrieved_timestamp": "1762652579.994557",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Multimash3-12B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Multimash3-12B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44371046600796993
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5176624678276028
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06268882175226587
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4343958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3067652925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 12.879
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Multimerge-19B-pass/818e21b8-da78-4649-a71a-ba71c89d1fe7.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Multimerge-19B-pass/818e21b8-da78-4649-a71a-ba71c89d1fe7.json
deleted file mode 100644
index fc024ad33f064a89a18788fa510b9296bdc118c9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Multimerge-19B-pass/818e21b8-da78-4649-a71a-ba71c89d1fe7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Multimerge-19B-pass/1762652579.9948218",
- "retrieved_timestamp": "1762652579.994823",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Multimerge-19B-pass",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Multimerge-19B-pass"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17730510600761534
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2891778102988436
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3429583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11685505319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 19.188
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiverseEx26-7B-slerp/30b74d3f-7247-4c93-9c94-dc8beba14b70.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiverseEx26-7B-slerp/30b74d3f-7247-4c93-9c94-dc8beba14b70.json
deleted file mode 100644
index d12bd02368cff20b40e3b473947d0a6b02051753..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_MultiverseEx26-7B-slerp/30b74d3f-7247-4c93-9c94-dc8beba14b70.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_MultiverseEx26-7B-slerp/1762652579.995038",
- "retrieved_timestamp": "1762652579.995039",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/MultiverseEx26-7B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/MultiverseEx26-7B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3938516469633905
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5133591871690678
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0755287009063444
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4293125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3035239361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_NeuralWestSeverus-7B-slerp/fc6d4451-0a9c-4d53-8d22-179ff7059d61.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_NeuralWestSeverus-7B-slerp/fc6d4451-0a9c-4d53-8d22-179ff7059d61.json
deleted file mode 100644
index d9511a3b9fadd5fff14699809db9161160d15108..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_NeuralWestSeverus-7B-slerp/fc6d4451-0a9c-4d53-8d22-179ff7059d61.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_NeuralWestSeverus-7B-slerp/1762652579.995253",
- "retrieved_timestamp": "1762652579.995254",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/NeuralWestSeverus-7B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/NeuralWestSeverus-7B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41356046401326263
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5244283854305991
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07326283987915408
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45287499999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3137466755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Neuralcoven-7B-slerp/ba46f82b-2129-43db-ae21-09e6576dc4e6.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Neuralcoven-7B-slerp/ba46f82b-2129-43db-ae21-09e6576dc4e6.json
deleted file mode 100644
index 3267ae27582d5de8ac48faf122230d20b2aad2c9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Neuralcoven-7B-slerp/ba46f82b-2129-43db-ae21-09e6576dc4e6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Neuralcoven-7B-slerp/1762652579.995681",
- "retrieved_timestamp": "1762652579.995682",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Neuralcoven-7B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Neuralcoven-7B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3858584112377381
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.530287217712165
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07854984894259819
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.429
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3293716755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Neuralmultiverse-7B-slerp/b98b76ea-b068-46ec-b929-4ca1037eaf99.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Neuralmultiverse-7B-slerp/b98b76ea-b068-46ec-b929-4ca1037eaf99.json
deleted file mode 100644
index 732b7e8c1f336cd162988d63c6325d78c1a77ff7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Neuralmultiverse-7B-slerp/b98b76ea-b068-46ec-b929-4ca1037eaf99.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Neuralmultiverse-7B-slerp/1762652579.995954",
- "retrieved_timestamp": "1762652579.995955",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Neuralmultiverse-7B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Neuralmultiverse-7B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3769154731667531
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5165722210470375
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0649546827794562
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42804166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30418882978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3della5-14B/d5a47313-b2f5-4833-9539-b8f56e4a5fda.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3della5-14B/d5a47313-b2f5-4833-9539-b8f56e4a5fda.json
deleted file mode 100644
index 4b3cc7c695a3b97dc4afd03f42052f90e21d2ab9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3della5-14B/d5a47313-b2f5-4833-9539-b8f56e4a5fda.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3della5-14B/1762652579.9961941",
- "retrieved_timestamp": "1762652579.996195",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Ph3della5-14B",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Ph3della5-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47985567183960776
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6331746353794991
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17673716012084592
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3422818791946309
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4386145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4787234042553192
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Phi3ForCausalLM",
- "params_billions": 13.96
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3merge-14B/95228f47-8fb1-443c-8ad4-0021504e34e0.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3merge-14B/95228f47-8fb1-443c-8ad4-0021504e34e0.json
deleted file mode 100644
index 422c3d87a755163cc9b66b5e722b3e9eadb0e4d9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3merge-14B/95228f47-8fb1-443c-8ad4-0021504e34e0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3merge-14B/1762652579.996419",
- "retrieved_timestamp": "1762652579.9964201",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Ph3merge-14B",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Ph3merge-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27012881376968667
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.638087568868341
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.010574018126888218
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4334375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4611037234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Phi3ForCausalLM",
- "params_billions": 13.619
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3merge2-14B/b5790fec-6c12-42a3-853c-488658bf949d.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3merge2-14B/b5790fec-6c12-42a3-853c-488658bf949d.json
deleted file mode 100644
index 279c00686f91f438e82c12c3eacf13e5cb0d5b88..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3merge2-14B/b5790fec-6c12-42a3-853c-488658bf949d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3merge2-14B/1762652579.996639",
- "retrieved_timestamp": "1762652579.99664",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Ph3merge2-14B",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Ph3merge2-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17061064641817045
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3606937444321621
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3910833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1722905585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Phi3ForCausalLM",
- "params_billions": 13.619
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3merge3-14B/e5d9bded-a8e4-4133-84b9-6eac517a4226.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3merge3-14B/e5d9bded-a8e4-4133-84b9-6eac517a4226.json
deleted file mode 100644
index 40127acc295b83bfb266ae82753be8659ce5a69b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3merge3-14B/e5d9bded-a8e4-4133-84b9-6eac517a4226.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3merge3-14B/1762652579.99685",
- "retrieved_timestamp": "1762652579.996851",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Ph3merge3-14B",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Ph3merge3-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1645157072124186
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3597431731140411
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40819791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16472739361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Phi3ForCausalLM",
- "params_billions": 13.619
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3task1-14B/718ef6de-5926-4a4c-bade-9a162ce8e730.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3task1-14B/718ef6de-5926-4a4c-bade-9a162ce8e730.json
deleted file mode 100644
index 1b52cda4647ece7070bd4b4d0efcb489818fe8a9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3task1-14B/718ef6de-5926-4a4c-bade-9a162ce8e730.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3task1-14B/1762652579.997059",
- "retrieved_timestamp": "1762652579.99706",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Ph3task1-14B",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Ph3task1-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46946435457918323
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.63178060736657
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16691842900302115
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35067114093959734
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45077083333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4734042553191489
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Phi3ForCausalLM",
- "params_billions": 13.96
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3task2-14B/5d818d86-2caf-4b29-9c15-8fa27217de22.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3task2-14B/5d818d86-2caf-4b29-9c15-8fa27217de22.json
deleted file mode 100644
index 719f0a115af0487d4b118c1fbef03432fa471163..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3task2-14B/5d818d86-2caf-4b29-9c15-8fa27217de22.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3task2-14B/1762652579.99728",
- "retrieved_timestamp": "1762652579.997281",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Ph3task2-14B",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Ph3task2-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4713127834146731
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6098412220695854
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14652567975830816
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4535
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44597739361702127
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Phi3ForCausalLM",
- "params_billions": 13.96
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3task3-14B/a935c0d1-6623-45c6-a100-96c8b5a3a2fb.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3task3-14B/a935c0d1-6623-45c6-a100-96c8b5a3a2fb.json
deleted file mode 100644
index 51e1df9078b7574d4d7789702bc244cab87d37f9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3task3-14B/a935c0d1-6623-45c6-a100-96c8b5a3a2fb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3task3-14B/1762652579.997498",
- "retrieved_timestamp": "1762652579.997499",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Ph3task3-14B",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Ph3task3-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4962421929369628
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6297915743094921
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17598187311178248
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3414429530201342
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44255208333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47706117021276595
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Phi3ForCausalLM",
- "params_billions": 13.96
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3unsloth-3B-slerp/0a9be33a-792e-413c-b60d-3e97a060fa78.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3unsloth-3B-slerp/0a9be33a-792e-413c-b60d-3e97a060fa78.json
deleted file mode 100644
index 405c47e04fdbed0aabbfd91fbd46aa496bdb1d5e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Ph3unsloth-3B-slerp/0a9be33a-792e-413c-b60d-3e97a060fa78.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Ph3unsloth-3B-slerp/1762652579.99772",
- "retrieved_timestamp": "1762652579.99772",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Ph3unsloth-3B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Ph3unsloth-3B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18944511673470835
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5468077356147099
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10120845921450151
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32466442953020136
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45278124999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3700964095744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 3.821
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Quen2-65B/4bc3f55b-0638-4fc2-b1d9-04780707acef.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Quen2-65B/4bc3f55b-0638-4fc2-b1d9-04780707acef.json
deleted file mode 100644
index 75a207e17116750d26a721993ee9eb092a86862b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Quen2-65B/4bc3f55b-0638-4fc2-b1d9-04780707acef.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Quen2-65B/1762652579.9981499",
- "retrieved_timestamp": "1762652579.9981499",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Quen2-65B",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Quen2-65B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17578137120617737
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27565161872324456
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23573825503355705
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32085416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11136968085106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 63.923
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_RogerMerge-7B-slerp/50289a8b-4522-4dca-b6dc-aa42193deefa.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_RogerMerge-7B-slerp/50289a8b-4522-4dca-b6dc-aa42193deefa.json
deleted file mode 100644
index 54519ce30ad465d976b1ef3a4d86bac8383fa431..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_RogerMerge-7B-slerp/50289a8b-4522-4dca-b6dc-aa42193deefa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_RogerMerge-7B-slerp/1762652580.002474",
- "retrieved_timestamp": "1762652580.002475",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/RogerMerge-7B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/RogerMerge-7B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39330199426410817
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5160176493085935
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06873111782477341
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43197916666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30302526595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Strangecoven-7B-slerp/f125c8d1-57f3-4b79-ace4-2104b008a507.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Strangecoven-7B-slerp/f125c8d1-57f3-4b79-ace4-2104b008a507.json
deleted file mode 100644
index 3b14accda98ba1b21f530f5cb507a3c39dff0d5d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Strangecoven-7B-slerp/f125c8d1-57f3-4b79-ace4-2104b008a507.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Strangecoven-7B-slerp/1762652580.002888",
- "retrieved_timestamp": "1762652580.002889",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Strangecoven-7B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Strangecoven-7B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37464261492839
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5368022290282338
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07628398791540786
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4198854166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33643617021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Weirdslerp2-25B/61e517f7-e2db-48bd-8f4e-f62b5859b62e.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Weirdslerp2-25B/61e517f7-e2db-48bd-8f4e-f62b5859b62e.json
deleted file mode 100644
index 439f5d6a4dead5f8045d3c5f3b7114db20673605..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Weirdslerp2-25B/61e517f7-e2db-48bd-8f4e-f62b5859b62e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Weirdslerp2-25B/1762652580.00309",
- "retrieved_timestamp": "1762652580.0030909",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Weirdslerp2-25B",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Weirdslerp2-25B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1754068094877148
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2873695911207614
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24916107382550334
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3523541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11278257978723404
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 25.204
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_WestlakeMaziyar-7B-slerp/2db948db-a9e5-41cf-9567-2f9198d80900.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_WestlakeMaziyar-7B-slerp/2db948db-a9e5-41cf-9567-2f9198d80900.json
deleted file mode 100644
index 2d9780ebf41d217f8e830f0c2f4fa2553bd94ccf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_WestlakeMaziyar-7B-slerp/2db948db-a9e5-41cf-9567-2f9198d80900.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_WestlakeMaziyar-7B-slerp/1762652580.003291",
- "retrieved_timestamp": "1762652580.0032918",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/WestlakeMaziyar-7B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/WestlakeMaziyar-7B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48377748817581795
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5245479952765804
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06646525679758308
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44738541666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3077626329787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_YamMaths-7B-slerp/52ab1e94-4e6f-4876-932b-a45a033dec1b.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_YamMaths-7B-slerp/52ab1e94-4e6f-4876-932b-a45a033dec1b.json
deleted file mode 100644
index 73dff62b9b470f9cd03050b92eb7839109c52539..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_YamMaths-7B-slerp/52ab1e94-4e6f-4876-932b-a45a033dec1b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_YamMaths-7B-slerp/1762652580.003488",
- "retrieved_timestamp": "1762652580.003489",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/YamMaths-7B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/YamMaths-7B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4148093724650594
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5155845857281723
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08534743202416918
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43836458333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3130817819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yi-1.5-34B/98455065-72e1-4dad-bce1-1c3ceddf5433.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yi-1.5-34B/98455065-72e1-4dad-bce1-1c3ceddf5433.json
deleted file mode 100644
index fee1804ebf001a3845a7ae9177d6742ce4d1e27d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yi-1.5-34B/98455065-72e1-4dad-bce1-1c3ceddf5433.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Yi-1.5-34B/1762652580.0036852",
- "retrieved_timestamp": "1762652580.003686",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Yi-1.5-34B",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Yi-1.5-34B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16391618682872555
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28272506287695653
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25838926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38565625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10954122340425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.389
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yi-blossom-40B/b35eaca2-0f77-4171-bbcf-23a191b055f2.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yi-blossom-40B/b35eaca2-0f77-4171-bbcf-23a191b055f2.json
deleted file mode 100644
index c2d5fae60697662dde37a21171ef63c2e19ecb38..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yi-blossom-40B/b35eaca2-0f77-4171-bbcf-23a191b055f2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Yi-blossom-40B/1762652580.004046",
- "retrieved_timestamp": "1762652580.0040479",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Yi-blossom-40B",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Yi-blossom-40B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20088587170928693
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32150442258143547
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3842604166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10804521276595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 18.769
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yibuddy-35B/dc2688b9-9dff-4a2e-b3d8-3bdc82634d20.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yibuddy-35B/dc2688b9-9dff-4a2e-b3d8-3bdc82634d20.json
deleted file mode 100644
index 4ce44f0d067b06a8c7fd4262dd1d4a43e1c0cdc3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yibuddy-35B/dc2688b9-9dff-4a2e-b3d8-3bdc82634d20.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Yibuddy-35B/1762652580.004411",
- "retrieved_timestamp": "1762652580.004412",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Yibuddy-35B",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Yibuddy-35B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4234774841864032
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5916185369526096
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15709969788519637
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35570469798657717
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45045833333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44888630319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.389
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yislerp-34B/723d2f60-f12a-4abb-9061-807fd38e7d51.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yislerp-34B/723d2f60-f12a-4abb-9061-807fd38e7d51.json
deleted file mode 100644
index 0d29f7e54653e498212ee2b1a80f130899e082ec..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yislerp-34B/723d2f60-f12a-4abb-9061-807fd38e7d51.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Yislerp-34B/1762652580.0049741",
- "retrieved_timestamp": "1762652580.004975",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Yislerp-34B",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Yislerp-34B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3691970637907419
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6158722731484186
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21601208459214502
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35822147651006714
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.456625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4751496010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.389
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yislerp2-34B/ce55aca1-80bd-4711-ad05-d812d206bd14.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yislerp2-34B/ce55aca1-80bd-4711-ad05-d812d206bd14.json
deleted file mode 100644
index d1528c2b3cd2c58e3d040316460df4ae9fd5d136..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yislerp2-34B/ce55aca1-80bd-4711-ad05-d812d206bd14.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Yislerp2-34B/1762652580.005196",
- "retrieved_timestamp": "1762652580.005197",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Yislerp2-34B",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Yislerp2-34B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39994658616914236
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6245771970170245
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.229607250755287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3640939597315436
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45296875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.472406914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.389
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yunconglong-13B-slerp/8ae47af1-5ae6-4cb9-ac94-8d70fda5126d.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yunconglong-13B-slerp/8ae47af1-5ae6-4cb9-ac94-8d70fda5126d.json
deleted file mode 100644
index 57becbd06a4ffca27d7f9c1eed906710f611c045..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_Yunconglong-13B-slerp/8ae47af1-5ae6-4cb9-ac94-8d70fda5126d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Yunconglong-13B-slerp/1762652580.005601",
- "retrieved_timestamp": "1762652580.005603",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Yunconglong-13B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/Yunconglong-13B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42417673993891764
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5165807158493828
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.054380664652567974
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4160729166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30360704787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 12.879
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_limyClown-7B-slerp/420f8334-c420-4b8f-8853-fea8f4f5ac6d.json b/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_limyClown-7B-slerp/420f8334-c420-4b8f-8853-fea8f4f5ac6d.json
deleted file mode 100644
index 07cd165f20c9e3417edd53be2a6c2660e6b9bbd7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allknowingroger/allknowingroger_limyClown-7B-slerp/420f8334-c420-4b8f-8853-fea8f4f5ac6d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_limyClown-7B-slerp/1762652580.005876",
- "retrieved_timestamp": "1762652580.005877",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/limyClown-7B-slerp",
- "developer": "allknowingroger",
- "inference_platform": "unknown",
- "id": "allknowingroger/limyClown-7B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4017451473202215
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5147517317055973
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06873111782477341
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4293125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30377327127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_L3.1-8b-RP-Ink/cb8c45ae-1be6-4ab0-9317-cfbfc8850dc4.json b/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_L3.1-8b-RP-Ink/cb8c45ae-1be6-4ab0-9317-cfbfc8850dc4.json
deleted file mode 100644
index 3fa9328fa1d66d9f4e06bbd92af4a4bb402139c6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_L3.1-8b-RP-Ink/cb8c45ae-1be6-4ab0-9317-cfbfc8850dc4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allura-org_L3.1-8b-RP-Ink/1762652580.006678",
- "retrieved_timestamp": "1762652580.006679",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allura-org/L3.1-8b-RP-Ink",
- "developer": "allura-org",
- "inference_platform": "unknown",
- "id": "allura-org/L3.1-8b-RP-Ink"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7811063533646281
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48284724308518095
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14803625377643503
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3608229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3427526595744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_MN-12b-RP-Ink/3dc6cdf9-e75d-4f9f-9b91-9592e70566f8.json b/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_MN-12b-RP-Ink/3dc6cdf9-e75d-4f9f-9b91-9592e70566f8.json
deleted file mode 100644
index 7378c1ed7638f472eaff0b4065f0e81286aa8719..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_MN-12b-RP-Ink/3dc6cdf9-e75d-4f9f-9b91-9592e70566f8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allura-org_MN-12b-RP-Ink/1762652580.006974",
- "retrieved_timestamp": "1762652580.006975",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allura-org/MN-12b-RP-Ink",
- "developer": "allura-org",
- "inference_platform": "unknown",
- "id": "allura-org/MN-12b-RP-Ink"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7186332265056716
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4833826588550261
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11858006042296072
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38184375000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3513962765957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_MS-Meadowlark-22B/7ea2cf22-114f-449c-a9cf-c4f379646cd3.json b/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_MS-Meadowlark-22B/7ea2cf22-114f-449c-a9cf-c4f379646cd3.json
deleted file mode 100644
index 428d9e6b5653df5d81003b668a9dc255a84f73b7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_MS-Meadowlark-22B/7ea2cf22-114f-449c-a9cf-c4f379646cd3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allura-org_MS-Meadowlark-22B/1762652580.007196",
- "retrieved_timestamp": "1762652580.007197",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allura-org/MS-Meadowlark-22B",
- "developer": "allura-org",
- "inference_platform": "unknown",
- "id": "allura-org/MS-Meadowlark-22B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.669698621878837
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5162576933217772
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18353474320241692
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32550335570469796
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3842604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38231382978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 22.247
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_MoE-Girl-1BA-7BT/5b3176a0-7ded-409a-bc54-70e0ecf9b325.json b/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_MoE-Girl-1BA-7BT/5b3176a0-7ded-409a-bc54-70e0ecf9b325.json
deleted file mode 100644
index 1f85340e533310f48a2d51427049f75bd87079c0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_MoE-Girl-1BA-7BT/5b3176a0-7ded-409a-bc54-70e0ecf9b325.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allura-org_MoE-Girl-1BA-7BT/1762652580.0080209",
- "retrieved_timestamp": "1762652580.008022",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allura-org/MoE-Girl-1BA-7BT",
- "developer": "allura-org",
- "inference_platform": "unknown",
- "id": "allura-org/MoE-Girl-1BA-7BT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27050337548814923
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3139175363262408
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015105740181268883
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25838926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34355208333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12175864361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "OlmoeForCausalLM",
- "params_billions": 6.919
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_TQ2.5-14B-Aletheia-v1/b46bef60-b37b-4510-a92a-fb4c0cabb357.json b/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_TQ2.5-14B-Aletheia-v1/b46bef60-b37b-4510-a92a-fb4c0cabb357.json
deleted file mode 100644
index 128adaba22204cc173a6c58d4dfdd6fc61277159..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_TQ2.5-14B-Aletheia-v1/b46bef60-b37b-4510-a92a-fb4c0cabb357.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allura-org_TQ2.5-14B-Aletheia-v1/1762652580.008265",
- "retrieved_timestamp": "1762652580.008276",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allura-org/TQ2.5-14B-Aletheia-v1",
- "developer": "allura-org",
- "inference_platform": "unknown",
- "id": "allura-org/TQ2.5-14B-Aletheia-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7530297388706411
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6585074769185942
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33987915407854985
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3624161073825503
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44515625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5241023936170213
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_TQ2.5-14B-Neon-v1/68bdab24-8324-4190-abd2-ad3ad5a7a853.json b/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_TQ2.5-14B-Neon-v1/68bdab24-8324-4190-abd2-ad3ad5a7a853.json
deleted file mode 100644
index e9c11a66c0e195470fd46a27a8e29d1021175556..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_TQ2.5-14B-Neon-v1/68bdab24-8324-4190-abd2-ad3ad5a7a853.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allura-org_TQ2.5-14B-Neon-v1/1762652580.0085812",
- "retrieved_timestamp": "1762652580.0085819",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allura-org/TQ2.5-14B-Neon-v1",
- "developer": "allura-org",
- "inference_platform": "unknown",
- "id": "allura-org/TQ2.5-14B-Neon-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6754189993661264
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.655304131044165
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36027190332326287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3716442953020134
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.461
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5252659574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_Teleut-7b/85ceb275-787a-4dbc-981a-513fd16606ea.json b/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_Teleut-7b/85ceb275-787a-4dbc-981a-513fd16606ea.json
deleted file mode 100644
index 9a3b9bf6dd252bfb489e8a81ccc6b7b7284e9ba0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/allura-org/allura-org_Teleut-7b/85ceb275-787a-4dbc-981a-513fd16606ea.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allura-org_Teleut-7b/1762652580.008814",
- "retrieved_timestamp": "1762652580.008814",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allura-org/Teleut-7b",
- "developer": "allura-org",
- "inference_platform": "unknown",
- "id": "allura-org/Teleut-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6378752820294595
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5141277814496585
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24093655589123866
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3263422818791946
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4640416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4130651595744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/aloobun/aloobun_d-SmolLM2-360M/1ad7b4c4-8074-482e-9010-ce1552325e15.json b/leaderboard_data/HFOpenLLMv2/aloobun/aloobun_d-SmolLM2-360M/1ad7b4c4-8074-482e-9010-ce1552325e15.json
deleted file mode 100644
index f9fb6355bf70b8ceeadd0c7c03bbd1bc3922261c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/aloobun/aloobun_d-SmolLM2-360M/1ad7b4c4-8074-482e-9010-ce1552325e15.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/aloobun_d-SmolLM2-360M/1762652580.0092921",
- "retrieved_timestamp": "1762652580.009293",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "aloobun/d-SmolLM2-360M",
- "developer": "aloobun",
- "inference_platform": "unknown",
- "id": "aloobun/d-SmolLM2-360M"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20970358648386284
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3195784405636826
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01283987915407855
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2533557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3980625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11693816489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.362
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alpindale/alpindale_WizardLM-2-8x22B/c2899c4e-5bc9-4b0b-8938-b9848b86fe37.json b/leaderboard_data/HFOpenLLMv2/alpindale/alpindale_WizardLM-2-8x22B/c2899c4e-5bc9-4b0b-8938-b9848b86fe37.json
deleted file mode 100644
index c6bdd758d3a36e402e887af6d0f8b6426f83d8dd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alpindale/alpindale_WizardLM-2-8x22B/c2899c4e-5bc9-4b0b-8938-b9848b86fe37.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/alpindale_WizardLM-2-8x22B/1762652580.009551",
- "retrieved_timestamp": "1762652580.0095518",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "alpindale/WizardLM-2-8x22B",
- "developer": "alpindale",
- "inference_platform": "unknown",
- "id": "alpindale/WizardLM-2-8x22B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5272166739805937
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6377307938917097
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38171140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4387083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45960771276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 140.621
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/alpindale/alpindale_magnum-72b-v1/186687f8-ed25-44c9-b634-36db1c734844.json b/leaderboard_data/HFOpenLLMv2/alpindale/alpindale_magnum-72b-v1/186687f8-ed25-44c9-b634-36db1c734844.json
deleted file mode 100644
index 644786870687d66cbbe731cb4e1e1a0280f88bdf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/alpindale/alpindale_magnum-72b-v1/186687f8-ed25-44c9-b634-36db1c734844.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/alpindale_magnum-72b-v1/1762652580.0098088",
- "retrieved_timestamp": "1762652580.00981",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "alpindale/magnum-72b-v1",
- "developer": "alpindale",
- "inference_platform": "unknown",
- "id": "alpindale/magnum-72b-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7606484128778308
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6982215794373214
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39803625377643503
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39093959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4489375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5467918882978723
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.706
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/altomek/altomek_YiSM-34B-0rn/a9c75810-f51d-4fd3-8c96-6afdbc0f278c.json b/leaderboard_data/HFOpenLLMv2/altomek/altomek_YiSM-34B-0rn/a9c75810-f51d-4fd3-8c96-6afdbc0f278c.json
deleted file mode 100644
index 102b9570e46a4e4c2baf441b262f6565aa1a4149..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/altomek/altomek_YiSM-34B-0rn/a9c75810-f51d-4fd3-8c96-6afdbc0f278c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/altomek_YiSM-34B-0rn/1762652580.010027",
- "retrieved_timestamp": "1762652580.0100281",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "altomek/YiSM-34B-0rn",
- "developer": "altomek",
- "inference_platform": "unknown",
- "id": "altomek/YiSM-34B-0rn"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.428373382624769
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6140009573868866
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2280966767371601
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3716442953020134
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.445
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4695811170212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.389
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v1-72b/6d98f0fa-25c9-409b-b82e-b3c128bf47b6.json b/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v1-72b/6d98f0fa-25c9-409b-b82e-b3c128bf47b6.json
deleted file mode 100644
index bb64ccfe0cc8217804b53eefe099c6cde2f18db2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v1-72b/6d98f0fa-25c9-409b-b82e-b3c128bf47b6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v1-72b/1762652580.0112262",
- "retrieved_timestamp": "1762652580.011227",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "anthracite-org/magnum-v1-72b",
- "developer": "anthracite-org",
- "inference_platform": "unknown",
- "id": "anthracite-org/magnum-v1-72b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7606484128778308
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6982215794373214
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39803625377643503
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39093959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4489375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5486203457446809
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.706
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v2-12b/72821a7d-cc27-4557-82d4-7e30286ea126.json b/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v2-12b/72821a7d-cc27-4557-82d4-7e30286ea126.json
deleted file mode 100644
index c6406bce48723e6529e9394e13d63882304366d3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v2-12b/72821a7d-cc27-4557-82d4-7e30286ea126.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v2-12b/1762652580.011473",
- "retrieved_timestamp": "1762652580.011474",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "anthracite-org/magnum-v2-12b",
- "developer": "anthracite-org",
- "inference_platform": "unknown",
- "id": "anthracite-org/magnum-v2-12b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.376166349729828
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5020864013200114
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.054380664652567974
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41790625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31673869680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v2-72b/31d80ab1-348f-4b5a-963e-f027adf32101.json b/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v2-72b/31d80ab1-348f-4b5a-963e-f027adf32101.json
deleted file mode 100644
index 7171ebe2cc702db71e6cabec65d0f6aa9e576842..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v2-72b/31d80ab1-348f-4b5a-963e-f027adf32101.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v2-72b/1762652580.01168",
- "retrieved_timestamp": "1762652580.01168",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "anthracite-org/magnum-v2-72b",
- "developer": "anthracite-org",
- "inference_platform": "unknown",
- "id": "anthracite-org/magnum-v2-72b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7560273407891063
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7005076514129516
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3542296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3859060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4371875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5456283244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.706
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v2.5-12b-kto/74e67572-01d9-4890-9c5a-27b5559cf752.json b/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v2.5-12b-kto/74e67572-01d9-4890-9c5a-27b5559cf752.json
deleted file mode 100644
index 6e6ed599185531ac424947eaf58c123f52c68a64..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v2.5-12b-kto/74e67572-01d9-4890-9c5a-27b5559cf752.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v2.5-12b-kto/1762652580.011887",
- "retrieved_timestamp": "1762652580.011888",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "anthracite-org/magnum-v2.5-12b-kto",
- "developer": "anthracite-org",
- "inference_platform": "unknown",
- "id": "anthracite-org/magnum-v2.5-12b-kto"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3865576669902525
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5076961186254344
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05211480362537765
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40863541666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3214760638297872
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v3-27b-kto/9a74a1f1-0322-4f96-8e52-76bbde948fa9.json b/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v3-27b-kto/9a74a1f1-0322-4f96-8e52-76bbde948fa9.json
deleted file mode 100644
index d594f30fbc6585c675fcc4962a9b5a8f390dd6ff..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v3-27b-kto/9a74a1f1-0322-4f96-8e52-76bbde948fa9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v3-27b-kto/1762652580.012144",
- "retrieved_timestamp": "1762652580.0121448",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "anthracite-org/magnum-v3-27b-kto",
- "developer": "anthracite-org",
- "inference_platform": "unknown",
- "id": "anthracite-org/magnum-v3-27b-kto"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5674831668860845
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.586040577894583
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18126888217522658
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35570469798657717
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38546874999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42378656914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v3-34b/8ace78d5-5390-49ec-935d-2c7faf7569ca.json b/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v3-34b/8ace78d5-5390-49ec-935d-2c7faf7569ca.json
deleted file mode 100644
index 55c2d45a1c2f78f4e821c862b7537d4adc49b4c4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v3-34b/8ace78d5-5390-49ec-935d-2c7faf7569ca.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v3-34b/1762652580.012352",
- "retrieved_timestamp": "1762652580.012352",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "anthracite-org/magnum-v3-34b",
- "developer": "anthracite-org",
- "inference_platform": "unknown",
- "id": "anthracite-org/magnum-v3-34b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5115294086357531
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6087828692085228
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19486404833836857
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36073825503355705
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3872395833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47523271276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.389
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v3-9b-chatml/42df1809-0021-4968-a18b-86cefc0125d7.json b/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v3-9b-chatml/42df1809-0021-4968-a18b-86cefc0125d7.json
deleted file mode 100644
index 3286d66f609439793ad22410d372d8b5b49ebd64..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v3-9b-chatml/42df1809-0021-4968-a18b-86cefc0125d7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v3-9b-chatml/1762652580.0125592",
- "retrieved_timestamp": "1762652580.0125592",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "anthracite-org/magnum-v3-9b-chatml",
- "developer": "anthracite-org",
- "inference_platform": "unknown",
- "id": "anthracite-org/magnum-v3-9b-chatml"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12747066671985885
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5427688488887096
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06948640483383686
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34563758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4432291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4242021276595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-12b/c7ba8947-fd38-4ba1-9169-6c9164123273.json b/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-12b/c7ba8947-fd38-4ba1-9169-6c9164123273.json
deleted file mode 100644
index fb37b306aa09b1cf23957e6dc840422caac4b04e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-12b/c7ba8947-fd38-4ba1-9169-6c9164123273.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v4-12b/1762652580.013016",
- "retrieved_timestamp": "1762652580.013016",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "anthracite-org/magnum-v4-12b",
- "developer": "anthracite-org",
- "inference_platform": "unknown",
- "id": "anthracite-org/magnum-v4-12b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33929640021808805
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5176693046591915
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11782477341389729
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40928125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3603723404255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-22b/5e3f808c-964d-492d-a003-37594dd36f89.json b/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-22b/5e3f808c-964d-492d-a003-37594dd36f89.json
deleted file mode 100644
index 784a61915ffe4f547a111fa57b71ba5ab697bfc4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-22b/5e3f808c-964d-492d-a003-37594dd36f89.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v4-22b/1762652580.013223",
- "retrieved_timestamp": "1762652580.013224",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "anthracite-org/magnum-v4-22b",
- "developer": "anthracite-org",
- "inference_platform": "unknown",
- "id": "anthracite-org/magnum-v4-22b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5628620947973599
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.548612004937422
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2001510574018127
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32802013422818793
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44078124999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3829787234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 22.247
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-27b/113ce0c6-c292-4924-adca-afdbcdd4c381.json b/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-27b/113ce0c6-c292-4924-adca-afdbcdd4c381.json
deleted file mode 100644
index 6c2caa3b15d173fd227375581940017c82ca62e1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-27b/113ce0c6-c292-4924-adca-afdbcdd4c381.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v4-27b/1762652580.013432",
- "retrieved_timestamp": "1762652580.013433",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "anthracite-org/magnum-v4-27b",
- "developer": "anthracite-org",
- "inference_platform": "unknown",
- "id": "anthracite-org/magnum-v4-27b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34541682735142754
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5867298109891389
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1797583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3699664429530201
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4379895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43758311170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-9b/55401aa6-ad61-42d6-9163-5d105a9091bf.json b/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-9b/55401aa6-ad61-42d6-9163-5d105a9091bf.json
deleted file mode 100644
index 10772be33cd10dd4e008cf2c7e7befc4148f80d4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/anthracite-org/anthracite-org_magnum-v4-9b/55401aa6-ad61-42d6-9163-5d105a9091bf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v4-9b/1762652580.013639",
- "retrieved_timestamp": "1762652580.013639",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "anthracite-org/magnum-v4-9b",
- "developer": "anthracite-org",
- "inference_platform": "unknown",
- "id": "anthracite-org/magnum-v4-9b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3502628581053826
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5336423991931557
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13066465256797583
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34731543624161076
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45157291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3952792553191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/anthropic/xxx777xxxASD_L3.1-ClaudeMaid-4x8B/ae6d070b-71de-40c3-8f69-944ce2e33abb.json b/leaderboard_data/HFOpenLLMv2/anthropic/xxx777xxxASD_L3.1-ClaudeMaid-4x8B/ae6d070b-71de-40c3-8f69-944ce2e33abb.json
deleted file mode 100644
index a9b8f8176be08a31da5a6ce417d3a30a531d05a9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/anthropic/xxx777xxxASD_L3.1-ClaudeMaid-4x8B/ae6d070b-71de-40c3-8f69-944ce2e33abb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/xxx777xxxASD_L3.1-ClaudeMaid-4x8B/1762652580.602767",
- "retrieved_timestamp": "1762652580.602768",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "xxx777xxxASD/L3.1-ClaudeMaid-4x8B",
- "developer": "anthropic",
- "inference_platform": "unknown",
- "id": "xxx777xxxASD/L3.1-ClaudeMaid-4x8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6696487541944263
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5070848048063867
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14123867069486404
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42893749999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35804521276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 24.942
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/apple/apple_DCLM-7B/3891ad0a-0acf-4d3e-a9e8-533633d9557a.json b/leaderboard_data/HFOpenLLMv2/apple/apple_DCLM-7B/3891ad0a-0acf-4d3e-a9e8-533633d9557a.json
deleted file mode 100644
index 2f8794dc36476ecd5f43481152a4d9bd01da8fb8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/apple/apple_DCLM-7B/3891ad0a-0acf-4d3e-a9e8-533633d9557a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/apple_DCLM-7B/1762652580.0138528",
- "retrieved_timestamp": "1762652580.013854",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "apple/DCLM-7B",
- "developer": "apple",
- "inference_platform": "unknown",
- "id": "apple/DCLM-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21727239280664196
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42321423668184166
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03700906344410876
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31543624161073824
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3920729166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3110871010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "OpenLMModel",
- "params_billions": 7.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/appvoid/appvoid_arco-2-instruct/95d1d5d9-b613-46b4-b0de-540641d8d81a.json b/leaderboard_data/HFOpenLLMv2/appvoid/appvoid_arco-2-instruct/95d1d5d9-b613-46b4-b0de-540641d8d81a.json
deleted file mode 100644
index 1290f6154bb66918bd21ce8bf564391146f0529c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/appvoid/appvoid_arco-2-instruct/95d1d5d9-b613-46b4-b0de-540641d8d81a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/appvoid_arco-2-instruct/1762652580.014716",
- "retrieved_timestamp": "1762652580.0147169",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "appvoid/arco-2-instruct",
- "developer": "appvoid",
- "inference_platform": "unknown",
- "id": "appvoid/arco-2-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2164479137577184
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31330470624451107
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01283987915407855
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23825503355704697
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34959375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11128656914893617
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.514
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/appvoid/appvoid_arco-2/a037593c-0f98-4b23-a139-12cfc435de3c.json b/leaderboard_data/HFOpenLLMv2/appvoid/appvoid_arco-2/a037593c-0f98-4b23-a139-12cfc435de3c.json
deleted file mode 100644
index f3456f654e15872ad6a9c42c69ca6589bb58a60c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/appvoid/appvoid_arco-2/a037593c-0f98-4b23-a139-12cfc435de3c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/appvoid_arco-2/1762652580.014345",
- "retrieved_timestamp": "1762652580.014347",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "appvoid/arco-2",
- "developer": "appvoid",
- "inference_platform": "unknown",
- "id": "appvoid/arco-2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19913717824261848
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31456676274830814
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.013595166163141994
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23909395973154363
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35359375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1116190159574468
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.514
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Blitz/01e8e033-1aa9-42e2-85d8-b7974d0c9e23.json b/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Blitz/01e8e033-1aa9-42e2-85d8-b7974d0c9e23.json
deleted file mode 100644
index 0e3cd2d5ca4e1e4652c3483c42cdf372541ce53a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Blitz/01e8e033-1aa9-42e2-85d8-b7974d0c9e23.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Blitz/1762652580.0149639",
- "retrieved_timestamp": "1762652580.014965",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "arcee-ai/Arcee-Blitz",
- "developer": "arcee-ai",
- "inference_platform": "unknown",
- "id": "arcee-ai/Arcee-Blitz"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5543435861292482
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6606628431550884
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34818731117824775
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3850671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.50471875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6153590425531915
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 23.572
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Maestro-7B-Preview/126f5eda-1529-450f-8557-dcd6a33b7bd4.json b/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Maestro-7B-Preview/126f5eda-1529-450f-8557-dcd6a33b7bd4.json
deleted file mode 100644
index 170c6167a734fc0dc31da6616a2989e3a6b601f1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Maestro-7B-Preview/126f5eda-1529-450f-8557-dcd6a33b7bd4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Maestro-7B-Preview/1762652580.015253",
- "retrieved_timestamp": "1762652580.015254",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "arcee-ai/Arcee-Maestro-7B-Preview",
- "developer": "arcee-ai",
- "inference_platform": "unknown",
- "id": "arcee-ai/Arcee-Maestro-7B-Preview"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2750247122080524
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4648373015709704
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49924471299093653
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33221476510067116
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3885416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3039394946808511
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Nova/9063608f-8d32-4e98-ad05-621f6239d0ba.json b/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Nova/9063608f-8d32-4e98-ad05-621f6239d0ba.json
deleted file mode 100644
index 6536419adce18ba294f9d2cc68cb6290fe3c94e8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Nova/9063608f-8d32-4e98-ad05-621f6239d0ba.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Nova/1762652580.0154781",
- "retrieved_timestamp": "1762652580.015479",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "arcee-ai/Arcee-Nova",
- "developer": "arcee-ai",
- "inference_platform": "unknown",
- "id": "arcee-ai/Arcee-Nova"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7907485471881275
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.694196965855899
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4380664652567976
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3850671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45616666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5452127659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.706
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Spark/1dde2278-39aa-43cf-8d94-5d4a0bb514ca.json b/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Spark/1dde2278-39aa-43cf-8d94-5d4a0bb514ca.json
deleted file mode 100644
index 9cb008bd1ed032e8c5b6d039cfa6425c18f3a45b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Spark/1dde2278-39aa-43cf-8d94-5d4a0bb514ca.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Spark/1762652580.0159192",
- "retrieved_timestamp": "1762652580.0159202",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "arcee-ai/Arcee-Spark",
- "developer": "arcee-ai",
- "inference_platform": "unknown",
- "id": "arcee-ai/Arcee-Spark"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.571829412625168
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5480864114714127
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11404833836858004
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4007604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38131648936170215
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Spark/84a51879-cd67-449b-ace0-f87cccd6ea8c.json b/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Spark/84a51879-cd67-449b-ace0-f87cccd6ea8c.json
deleted file mode 100644
index fc8bf883871e987625dc1d280204c4b152124d39..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Arcee-Spark/84a51879-cd67-449b-ace0-f87cccd6ea8c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/arcee-ai_Arcee-Spark/1762652580.015698",
- "retrieved_timestamp": "1762652580.015699",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "arcee-ai/Arcee-Spark",
- "developer": "arcee-ai",
- "inference_platform": "unknown",
- "id": "arcee-ai/Arcee-Spark"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5620874834328471
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5489474198567446
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29531722054380666
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40209374999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3822307180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_SuperNova-Medius/7e0e8ab9-a90b-4f0e-8e0a-eeceac12a4a1.json b/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_SuperNova-Medius/7e0e8ab9-a90b-4f0e-8e0a-eeceac12a4a1.json
deleted file mode 100644
index b2de2bb6e19fd81ff2de24b3359ed31aecad0f95..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_SuperNova-Medius/7e0e8ab9-a90b-4f0e-8e0a-eeceac12a4a1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/arcee-ai_SuperNova-Medius/1762652580.016611",
- "retrieved_timestamp": "1762652580.016612",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "arcee-ai/SuperNova-Medius",
- "developer": "arcee-ai",
- "inference_platform": "unknown",
- "id": "arcee-ai/SuperNova-Medius"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7183584001560305
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6377284463115707
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4690332326283988
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33305369127516776
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4232708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5034906914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Virtuoso-Lite/62afba84-9929-4882-843e-3f7db7b030a3.json b/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Virtuoso-Lite/62afba84-9929-4882-843e-3f7db7b030a3.json
deleted file mode 100644
index 3700230a541934936e8cb378702255986768bb10..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Virtuoso-Lite/62afba84-9929-4882-843e-3f7db7b030a3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/arcee-ai_Virtuoso-Lite/1762652580.0168262",
- "retrieved_timestamp": "1762652580.0168269",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "arcee-ai/Virtuoso-Lite",
- "developer": "arcee-ai",
- "inference_platform": "unknown",
- "id": "arcee-ai/Virtuoso-Lite"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8099575792231279
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6098520975127147
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25302114803625375
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34395973154362414
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4595416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4440658244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.306
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Virtuoso-Small-v2/325cf0a5-6a72-466a-8e1e-531f03db6083.json b/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Virtuoso-Small-v2/325cf0a5-6a72-466a-8e1e-531f03db6083.json
deleted file mode 100644
index 31dfe246ef26e483e1cdcd6c4537d5b7281467cc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Virtuoso-Small-v2/325cf0a5-6a72-466a-8e1e-531f03db6083.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/arcee-ai_Virtuoso-Small-v2/1762652580.0172758",
- "retrieved_timestamp": "1762652580.017277",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "arcee-ai/Virtuoso-Small-v2",
- "developer": "arcee-ai",
- "inference_platform": "unknown",
- "id": "arcee-ai/Virtuoso-Small-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8273181824226385
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6554097094586643
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.466012084592145
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35318791946308725
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43133333333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.518783244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Virtuoso-Small/cc51c0e0-4e5d-496c-bf02-8b5d8f474cd3.json b/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Virtuoso-Small/cc51c0e0-4e5d-496c-bf02-8b5d8f474cd3.json
deleted file mode 100644
index 0c1451effe3474baed14ebf90c499a051afc360e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_Virtuoso-Small/cc51c0e0-4e5d-496c-bf02-8b5d8f474cd3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/arcee-ai_Virtuoso-Small/1762652580.017056",
- "retrieved_timestamp": "1762652580.017057",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "arcee-ai/Virtuoso-Small",
- "developer": "arcee-ai",
- "inference_platform": "unknown",
- "id": "arcee-ai/Virtuoso-Small"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7935211904413622
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6517633129454784
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4093655589123867
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33640939597315433
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43390625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5191156914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_raspberry-3B/cef8c893-a903-4e30-b7e1-5f2fe8f2ac82.json b/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_raspberry-3B/cef8c893-a903-4e30-b7e1-5f2fe8f2ac82.json
deleted file mode 100644
index 395b28f0f819c47cb7e874bc19cac58f93c8ea4d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/arcee-ai/arcee-ai_raspberry-3B/cef8c893-a903-4e30-b7e1-5f2fe8f2ac82.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/arcee-ai_raspberry-3B/1762652580.017479",
- "retrieved_timestamp": "1762652580.017479",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "arcee-ai/raspberry-3B",
- "developer": "arcee-ai",
- "inference_platform": "unknown",
- "id": "arcee-ai/raspberry-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31541642840995227
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42689280188827033
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10347432024169184
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41232291666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.285405585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/argilla/argilla_notus-7b-v1/c06f66ea-d9e3-4902-b3fd-188110f9c1e4.json b/leaderboard_data/HFOpenLLMv2/argilla/argilla_notus-7b-v1/c06f66ea-d9e3-4902-b3fd-188110f9c1e4.json
deleted file mode 100644
index d736355e2531573ffaa45e07dc0af56fdbb575f5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/argilla/argilla_notus-7b-v1/c06f66ea-d9e3-4902-b3fd-188110f9c1e4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/argilla_notus-7b-v1/1762652580.017684",
- "retrieved_timestamp": "1762652580.017685",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "argilla/notus-7b-v1",
- "developer": "argilla",
- "inference_platform": "unknown",
- "id": "argilla/notus-7b-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.508207112683236
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4511857407381495
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03172205438066465
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33641666666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3003656914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/argilla/argilla_notux-8x7b-v1/60185907-11c2-454c-bfbc-3c5741651ab7.json b/leaderboard_data/HFOpenLLMv2/argilla/argilla_notux-8x7b-v1/60185907-11c2-454c-bfbc-3c5741651ab7.json
deleted file mode 100644
index 72b15cf4e7c6bd1ade9554ed5c0fa54f4f683966..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/argilla/argilla_notux-8x7b-v1/60185907-11c2-454c-bfbc-3c5741651ab7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/argilla_notux-8x7b-v1/1762652580.017979",
- "retrieved_timestamp": "1762652580.0179799",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "argilla/notux-8x7b-v1",
- "developer": "argilla",
- "inference_platform": "unknown",
- "id": "argilla/notux-8x7b-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5422290633297429
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5363304164516353
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09969788519637462
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41759375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3660239361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 46.703
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/arisin/arisin_orca-platypus-13B-slerp/ecd45b21-21f7-49e2-b314-c7b678bdc8c1.json b/leaderboard_data/HFOpenLLMv2/arisin/arisin_orca-platypus-13B-slerp/ecd45b21-21f7-49e2-b314-c7b678bdc8c1.json
deleted file mode 100644
index 9d8d47f754c02f8ddd0e249db3231159297a8035..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/arisin/arisin_orca-platypus-13B-slerp/ecd45b21-21f7-49e2-b314-c7b678bdc8c1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/arisin_orca-platypus-13B-slerp/1762652580.018446",
- "retrieved_timestamp": "1762652580.018446",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "arisin/orca-platypus-13B-slerp",
- "developer": "arisin",
- "inference_platform": "unknown",
- "id": "arisin/orca-platypus-13B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26718107953563214
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46306234976954946
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015861027190332326
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4253125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2592253989361702
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 13.016
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/arshiaafshani/arshiaafshani_Arsh-V1/6f40503d-59ee-4cdc-a697-ef405d9644a7.json b/leaderboard_data/HFOpenLLMv2/arshiaafshani/arshiaafshani_Arsh-V1/6f40503d-59ee-4cdc-a697-ef405d9644a7.json
deleted file mode 100644
index 36e941a23c9f4ca6f6b22e78266fb20a788df8e5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/arshiaafshani/arshiaafshani_Arsh-V1/6f40503d-59ee-4cdc-a697-ef405d9644a7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/arshiaafshani_Arsh-V1/1762652580.0186949",
- "retrieved_timestamp": "1762652580.0186958",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "arshiaafshani/Arsh-V1",
- "developer": "arshiaafshani",
- "inference_platform": "unknown",
- "id": "arshiaafshani/Arsh-V1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6043276284702368
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6739657491720434
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2620845921450151
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3733221476510067
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48989583333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5256815159574468
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 13.96
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ashercn97/ashercn97_a1-v0.0.1/a9e3fe74-400c-444c-9b28-6f49c6671f96.json b/leaderboard_data/HFOpenLLMv2/ashercn97/ashercn97_a1-v0.0.1/a9e3fe74-400c-444c-9b28-6f49c6671f96.json
deleted file mode 100644
index 119362002823d32a4606d48ad69ab2c0f07aee4d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ashercn97/ashercn97_a1-v0.0.1/a9e3fe74-400c-444c-9b28-6f49c6671f96.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ashercn97_a1-v0.0.1/1762652580.019211",
- "retrieved_timestamp": "1762652580.019212",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ashercn97/a1-v0.0.1",
- "developer": "ashercn97",
- "inference_platform": "unknown",
- "id": "ashercn97/a1-v0.0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21984445715146922
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5188122863232913
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21450151057401812
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.311241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4119791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41647273936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ashercn97/ashercn97_a1-v002/509c2895-70ae-4381-94ef-f6cdf9ee07ef.json b/leaderboard_data/HFOpenLLMv2/ashercn97/ashercn97_a1-v002/509c2895-70ae-4381-94ef-f6cdf9ee07ef.json
deleted file mode 100644
index 38599401c114e084f0b3cff79b36fff9bc9a41c3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ashercn97/ashercn97_a1-v002/509c2895-70ae-4381-94ef-f6cdf9ee07ef.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ashercn97_a1-v002/1762652580.019455",
- "retrieved_timestamp": "1762652580.019456",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ashercn97/a1-v002",
- "developer": "ashercn97",
- "inference_platform": "unknown",
- "id": "ashercn97/a1-v002"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2584631001298776
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5261137844506322
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23413897280966767
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3187919463087248
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41591666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41747007978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/assskelad/assskelad_smollm2-360M-sft_SmallThoughts/ce2f5cc8-a187-454d-ba99-4446d29aab7c.json b/leaderboard_data/HFOpenLLMv2/assskelad/assskelad_smollm2-360M-sft_SmallThoughts/ce2f5cc8-a187-454d-ba99-4446d29aab7c.json
deleted file mode 100644
index 7b71bd5e9f94ccf6c07d6dd00ed67cd03166f187..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/assskelad/assskelad_smollm2-360M-sft_SmallThoughts/ce2f5cc8-a187-454d-ba99-4446d29aab7c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/assskelad_smollm2-360M-sft_SmallThoughts/1762652580.019667",
- "retrieved_timestamp": "1762652580.0196679",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "assskelad/smollm2-360M-sft_SmallThoughts",
- "developer": "assskelad",
- "inference_platform": "unknown",
- "id": "assskelad/smollm2-360M-sft_SmallThoughts"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20071078072846715
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3149572469619188
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01661631419939577
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3395208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11818484042553191
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.362
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/athirdpath/athirdpath_Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit/9255090f-6862-4ff1-ac91-fe0cd7613445.json b/leaderboard_data/HFOpenLLMv2/athirdpath/athirdpath_Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit/9255090f-6862-4ff1-ac91-fe0cd7613445.json
deleted file mode 100644
index 854c012b84c77e05c2d53ee0ca5e04891c6ac057..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/athirdpath/athirdpath_Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit/9255090f-6862-4ff1-ac91-fe0cd7613445.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/athirdpath_Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit/1762652580.019914",
- "retrieved_timestamp": "1762652580.019914",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit",
- "developer": "athirdpath",
- "inference_platform": "unknown",
- "id": "athirdpath/Llama-3.1-Instruct_NSFW-pretrained_e1-plus_reddit"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4521037513796726
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4939066588253951
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10196374622356495
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3863958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3564660904255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/automerger/automerger_YamshadowExperiment28-7B/1fa5dee9-c360-40d9-8e67-9b415cd36616.json b/leaderboard_data/HFOpenLLMv2/automerger/automerger_YamshadowExperiment28-7B/1fa5dee9-c360-40d9-8e67-9b415cd36616.json
deleted file mode 100644
index 24a115bfe3d3784b36bd8b0e16576ca6e2ee9b67..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/automerger/automerger_YamshadowExperiment28-7B/1fa5dee9-c360-40d9-8e67-9b415cd36616.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/automerger_YamshadowExperiment28-7B/1762652580.020166",
- "retrieved_timestamp": "1762652580.0201669",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "automerger/YamshadowExperiment28-7B",
- "developer": "automerger",
- "inference_platform": "unknown",
- "id": "automerger/YamshadowExperiment28-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4070156074770498
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5150030227855061
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06117824773413897
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28691275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4306145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30601728723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/avemio/avemio_GRAG-NEMO-12B-ORPO-HESSIAN-AI/45cc7b31-3f75-42f7-9b07-3cf704fd2b55.json b/leaderboard_data/HFOpenLLMv2/avemio/avemio_GRAG-NEMO-12B-ORPO-HESSIAN-AI/45cc7b31-3f75-42f7-9b07-3cf704fd2b55.json
deleted file mode 100644
index bd276659edf858fe751cd06c17b236392b91bb2d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/avemio/avemio_GRAG-NEMO-12B-ORPO-HESSIAN-AI/45cc7b31-3f75-42f7-9b07-3cf704fd2b55.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/avemio_GRAG-NEMO-12B-ORPO-HESSIAN-AI/1762652580.020413",
- "retrieved_timestamp": "1762652580.0204139",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI",
- "developer": "avemio",
- "inference_platform": "unknown",
- "id": "avemio/GRAG-NEMO-12B-ORPO-HESSIAN-AI"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26065954545866094
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3446666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10605053191489362
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/baconnier/baconnier_Napoleon_24B_V0.0/88fb101e-35dd-40af-922f-9b66a2711249.json b/leaderboard_data/HFOpenLLMv2/baconnier/baconnier_Napoleon_24B_V0.0/88fb101e-35dd-40af-922f-9b66a2711249.json
deleted file mode 100644
index b655eb995dbc57cbc77b051891d479d69b8a90c0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/baconnier/baconnier_Napoleon_24B_V0.0/88fb101e-35dd-40af-922f-9b66a2711249.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/baconnier_Napoleon_24B_V0.0/1762652580.0222468",
- "retrieved_timestamp": "1762652580.022248",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "baconnier/Napoleon_24B_V0.0",
- "developer": "baconnier",
- "inference_platform": "unknown",
- "id": "baconnier/Napoleon_24B_V0.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1801021290176731
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6367110843973786
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22734138972809667
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37919463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4419895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5039893617021277
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 23.572
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/baconnier/baconnier_Napoleon_24B_V0.2/4857d2d0-1a4b-4544-8b1e-fb4b01618a3b.json b/leaderboard_data/HFOpenLLMv2/baconnier/baconnier_Napoleon_24B_V0.2/4857d2d0-1a4b-4544-8b1e-fb4b01618a3b.json
deleted file mode 100644
index 2de000b54abc1bb4419a6c7f3aea5add9ee37ed9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/baconnier/baconnier_Napoleon_24B_V0.2/4857d2d0-1a4b-4544-8b1e-fb4b01618a3b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/baconnier_Napoleon_24B_V0.2/1762652580.022489",
- "retrieved_timestamp": "1762652580.022489",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "baconnier/Napoleon_24B_V0.2",
- "developer": "baconnier",
- "inference_platform": "unknown",
- "id": "baconnier/Napoleon_24B_V0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2527172347150006
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5910621269874454
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14350453172205438
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4459583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4356715425531915
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 23.572
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/baebee/baebee_7B-Cetacea/5985fed7-9c54-458d-8f64-533e248a38da.json b/leaderboard_data/HFOpenLLMv2/baebee/baebee_7B-Cetacea/5985fed7-9c54-458d-8f64-533e248a38da.json
deleted file mode 100644
index b5fb9fefb563617455a6ad1fd1029140002463a6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/baebee/baebee_7B-Cetacea/5985fed7-9c54-458d-8f64-533e248a38da.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/baebee_7B-Cetacea/1762652580.022699",
- "retrieved_timestamp": "1762652580.022699",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "baebee/7B-Cetacea",
- "developer": "baebee",
- "inference_platform": "unknown",
- "id": "baebee/7B-Cetacea"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5278660620486975
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4757171853895546
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04682779456193353
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2860738255033557
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41362499999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2954621010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/baebee/baebee_mergekit-model_stock-nzjnheg/e847afb0-c8ac-4cce-b0f9-1667c9fbef3c.json b/leaderboard_data/HFOpenLLMv2/baebee/baebee_mergekit-model_stock-nzjnheg/e847afb0-c8ac-4cce-b0f9-1667c9fbef3c.json
deleted file mode 100644
index f360bc36ace9744296d5a1fd42d1e74f44ac3fa3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/baebee/baebee_mergekit-model_stock-nzjnheg/e847afb0-c8ac-4cce-b0f9-1667c9fbef3c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/baebee_mergekit-model_stock-nzjnheg/1762652580.022936",
- "retrieved_timestamp": "1762652580.022937",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "baebee/mergekit-model_stock-nzjnheg",
- "developer": "baebee",
- "inference_platform": "unknown",
- "id": "baebee/mergekit-model_stock-nzjnheg"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48442687624392167
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5287391310729729
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16767371601208458
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38466666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3699301861702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/baebee/baebee_mergekit-ties-fnjenli/21b3d7d0-301d-431d-9cfc-a0ad1e326f03.json b/leaderboard_data/HFOpenLLMv2/baebee/baebee_mergekit-ties-fnjenli/21b3d7d0-301d-431d-9cfc-a0ad1e326f03.json
deleted file mode 100644
index 8b09397de01e1f4499888101cc442c0d0a223b2a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/baebee/baebee_mergekit-ties-fnjenli/21b3d7d0-301d-431d-9cfc-a0ad1e326f03.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/baebee_mergekit-ties-fnjenli/1762652580.0231512",
- "retrieved_timestamp": "1762652580.023152",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "baebee/mergekit-ties-fnjenli",
- "developer": "baebee",
- "inference_platform": "unknown",
- "id": "baebee/mergekit-ties-fnjenli"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19881248420856662
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30236959112076134
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.002265861027190332
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24496644295302014
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4019375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11286569148936171
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.1v/ae256440-486f-43cf-b4a3-8d5c0ff196c9.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.1v/ae256440-486f-43cf-b4a3-8d5c0ff196c9.json
deleted file mode 100644
index 4e2f8cb3827c7a54a9e7e2ea15d555f4a8f36850..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.1v/ae256440-486f-43cf-b4a3-8d5c0ff196c9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.1v/1762652580.023659",
- "retrieved_timestamp": "1762652580.023659",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.1v",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.1v"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36362628935668473
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5436022524587655
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13293051359516617
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32802013422818793
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41315624999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3673537234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.2v/d509b0d3-a043-4057-bf80-37ec5ceedeed.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.2v/d509b0d3-a043-4057-bf80-37ec5ceedeed.json
deleted file mode 100644
index 324dfa648d7fe80bf583fd3329b8018391a79b3a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.2v/d509b0d3-a043-4057-bf80-37ec5ceedeed.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.2v/1762652580.023869",
- "retrieved_timestamp": "1762652580.02387",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.2v",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.2v"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3623773809048879
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5434355857920987
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12613293051359517
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32550335570469796
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41582291666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36627327127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.3v/8e2e1f2f-4715-4b8b-b641-d5e552500408.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.3v/8e2e1f2f-4715-4b8b-b641-d5e552500408.json
deleted file mode 100644
index dd61b416c2631612833980d04700d071bc7cbebe..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.3v/8e2e1f2f-4715-4b8b-b641-d5e552500408.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.3v/1762652580.02432",
- "retrieved_timestamp": "1762652580.024322",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.3v",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.3v"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38698209639312575
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5431389316665282
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1336858006042296
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41312499999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3663563829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.4v/4072cc72-b6b4-4a5d-8f01-f9f8437ea569.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.4v/4072cc72-b6b4-4a5d-8f01-f9f8437ea569.json
deleted file mode 100644
index be9a70e8728b3885dc322bc337068842bd710221..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.4v/4072cc72-b6b4-4a5d-8f01-f9f8437ea569.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.4v/1762652580.024673",
- "retrieved_timestamp": "1762652580.024674",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.4v",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.4v"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6508142838778884
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5094241395384186
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1351963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31711409395973156
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41762499999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36826795212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.5v/fa2e9cff-4a7b-4efd-98ca-b8fd2cb33928.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.5v/fa2e9cff-4a7b-4efd-98ca-b8fd2cb33928.json
deleted file mode 100644
index fdc5ac35e2b2c88da6248c38f6a780384bc9f642..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.5v/fa2e9cff-4a7b-4efd-98ca-b8fd2cb33928.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.5v/1762652580.0249128",
- "retrieved_timestamp": "1762652580.024914",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.5v",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.5v"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3745672593163916
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5421932988679541
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13670694864048338
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41315624999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36610704787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.6v/a58c4863-e5a9-425d-ad3e-5924d6146718.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.6v/a58c4863-e5a9-425d-ad3e-5924d6146718.json
deleted file mode 100644
index 034b074cbaa9fb20b1328f13d34b78389e956a86..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_0.6v/a58c4863-e5a9-425d-ad3e-5924d6146718.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_0.6v/1762652580.025138",
- "retrieved_timestamp": "1762652580.0251389",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/MISCHIEVOUS-12B-Mix_0.6v",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/MISCHIEVOUS-12B-Mix_0.6v"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43656608908806416
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5448909065942131
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12537764350453173
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32802013422818793
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4184895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3661901595744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_III_IV_V/c2e334b3-e82d-40bb-a6ed-9a941bf2352a.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_III_IV_V/c2e334b3-e82d-40bb-a6ed-9a941bf2352a.json
deleted file mode 100644
index e2ff6f620ab8e77e5bec3d987bce06f6905b1899..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_III_IV_V/c2e334b3-e82d-40bb-a6ed-9a941bf2352a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_III_IV_V/1762652580.0253649",
- "retrieved_timestamp": "1762652580.025366",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/MISCHIEVOUS-12B-Mix_III_IV_V"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40309379114083965
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.54645347832278
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12915407854984895
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41982291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3664394946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_III_ex_V/6f31292a-b09f-4e2c-ae3c-b093c5ba06c6.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_III_ex_V/6f31292a-b09f-4e2c-ae3c-b093c5ba06c6.json
deleted file mode 100644
index e9ffa7b59a42a25c4a8cbab4ebfa49d496799d9f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_III_ex_V/6f31292a-b09f-4e2c-ae3c-b093c5ba06c6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_III_ex_V/1762652580.025593",
- "retrieved_timestamp": "1762652580.025593",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/MISCHIEVOUS-12B-Mix_III_ex_V"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43162032296528763
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5448926891254073
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13217522658610273
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4197916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3648603723404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_Neo/089a5215-70a4-4255-ac01-1b70d4e8a494.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_Neo/089a5215-70a4-4255-ac01-1b70d4e8a494.json
deleted file mode 100644
index 1b433a24f6efc5e6b103aaacd746387997df2070..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B-Mix_Neo/089a5215-70a4-4255-ac01-1b70d4e8a494.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B-Mix_Neo/1762652580.0258071",
- "retrieved_timestamp": "1762652580.0258079",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/MISCHIEVOUS-12B-Mix_Neo",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/MISCHIEVOUS-12B-Mix_Neo"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6249606599378538
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5077574728717519
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13293051359516617
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41502083333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36851728723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B/49ec948c-c06d-4c01-be83-9f74ed15ea17.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B/49ec948c-c06d-4c01-be83-9f74ed15ea17.json
deleted file mode 100644
index 47ad61c32c69c77657a951c6b5576e58158e225c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_MISCHIEVOUS-12B/49ec948c-c06d-4c01-be83-9f74ed15ea17.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_MISCHIEVOUS-12B/1762652580.02337",
- "retrieved_timestamp": "1762652580.02337",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/MISCHIEVOUS-12B",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/MISCHIEVOUS-12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3851835352420466
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5404981575206657
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12764350453172205
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4144895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3671875
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_NameLess-12B-prob/81670e41-16d6-43a6-9af9-6924a52a8300.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_NameLess-12B-prob/81670e41-16d6-43a6-9af9-6924a52a8300.json
deleted file mode 100644
index 848eb9c3e29af398a749ed46784cbbed7c775649..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_NameLess-12B-prob/81670e41-16d6-43a6-9af9-6924a52a8300.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_NameLess-12B-prob/1762652580.026292",
- "retrieved_timestamp": "1762652580.026293",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/NameLess-12B-prob",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/NameLess-12B-prob"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6602315190361574
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5158141019151304
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12613293051359517
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3145973154362416
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.433625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3684341755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-0.1v/2d468a71-7364-40eb-8a98-1dbac956b3cf.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-0.1v/2d468a71-7364-40eb-8a98-1dbac956b3cf.json
deleted file mode 100644
index 09777d04a12f8880039a09148107ce6c88568300..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-0.1v/2d468a71-7364-40eb-8a98-1dbac956b3cf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-0.1v/1762652580.026718",
- "retrieved_timestamp": "1762652580.026719",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/VICIOUS_MESH-12B-0.1v",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/VICIOUS_MESH-12B-0.1v"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36574954454181574
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5412276004529172
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13217522658610273
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32466442953020136
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41582291666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36826795212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 6.124
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-0.X.ver/d0c92f20-72d0-431c-b8ba-881b3a6ae158.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-0.X.ver/d0c92f20-72d0-431c-b8ba-881b3a6ae158.json
deleted file mode 100644
index 061f431bc4df59424ec0058099fcfb85762fb214..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-0.X.ver/d0c92f20-72d0-431c-b8ba-881b3a6ae158.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-0.X.ver/1762652580.0269299",
- "retrieved_timestamp": "1762652580.0269299",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/VICIOUS_MESH-12B-0.X.ver",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/VICIOUS_MESH-12B-0.X.ver"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37756486123485683
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.541624689936422
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12009063444108761
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3213087248322148
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41982291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36710438829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 6.124
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-ALPHA/0053cf6a-0e1e-49c5-8d0a-b3d7254e22f3.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-ALPHA/0053cf6a-0e1e-49c5-8d0a-b3d7254e22f3.json
deleted file mode 100644
index 9e4b4a2bef67c65005f8f09cfa57d5e50fed705e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-ALPHA/0053cf6a-0e1e-49c5-8d0a-b3d7254e22f3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-ALPHA/1762652580.0271401",
- "retrieved_timestamp": "1762652580.027141",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/VICIOUS_MESH-12B-ALPHA",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/VICIOUS_MESH-12B-ALPHA"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6365011502812536
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5093679898057982
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13670694864048338
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4202916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3696808510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-BETA/2f023511-2446-48f8-83e5-47225f15e905.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-BETA/2f023511-2446-48f8-83e5-47225f15e905.json
deleted file mode 100644
index ee84f26e18a1026e770a557c9524eca3b48ef938..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-BETA/2f023511-2446-48f8-83e5-47225f15e905.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-BETA/1762652580.0273511",
- "retrieved_timestamp": "1762652580.0273511",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/VICIOUS_MESH-12B-BETA",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/VICIOUS_MESH-12B-BETA"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6720967034136092
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5155964285724085
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13293051359516617
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4309895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36785239361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-DELTA/fcaf0de1-f4f5-4bfb-8276-29b3b1f5b5be.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-DELTA/fcaf0de1-f4f5-4bfb-8276-29b3b1f5b5be.json
deleted file mode 100644
index 7e48cf62590e0f1eccf3b4651871d5fddaf3d7f9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-DELTA/fcaf0de1-f4f5-4bfb-8276-29b3b1f5b5be.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-DELTA/1762652580.027563",
- "retrieved_timestamp": "1762652580.027563",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/VICIOUS_MESH-12B-DELTA",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/VICIOUS_MESH-12B-DELTA"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6468924675416783
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5055418480543742
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13746223564954682
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31208053691275167
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40565625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3651097074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 6.124
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-DIGAMMA/67e74757-9950-499e-9258-7ccd20b29835.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-DIGAMMA/67e74757-9950-499e-9258-7ccd20b29835.json
deleted file mode 100644
index fe5bcd44614e2ca868550692984e32e01b776ae4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-DIGAMMA/67e74757-9950-499e-9258-7ccd20b29835.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-DIGAMMA/1762652580.027769",
- "retrieved_timestamp": "1762652580.02777",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/VICIOUS_MESH-12B-DIGAMMA",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/VICIOUS_MESH-12B-DIGAMMA"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6429207835210575
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.506116784464076
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1336858006042296
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40965625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36585771276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 6.124
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-EPSILON/38864e75-9bb0-4eaa-ba87-c631838a9ad1.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-EPSILON/38864e75-9bb0-4eaa-ba87-c631838a9ad1.json
deleted file mode 100644
index f794c78907ab614f6e27a84e344f0df65b928134..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-EPSILON/38864e75-9bb0-4eaa-ba87-c631838a9ad1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-EPSILON/1762652580.0279832",
- "retrieved_timestamp": "1762652580.0279832",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/VICIOUS_MESH-12B-EPSILON",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/VICIOUS_MESH-12B-EPSILON"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6304560787599126
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5037995611302296
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12613293051359517
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3145973154362416
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4069895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36477726063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 6.124
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-GAMMA/4507a6c1-bfff-4e8d-92c6-7e923f74c4dc.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-GAMMA/4507a6c1-bfff-4e8d-92c6-7e923f74c4dc.json
deleted file mode 100644
index bfd838bcd876fb93192d2b5b4f8f14e7fabc3bee..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-GAMMA/4507a6c1-bfff-4e8d-92c6-7e923f74c4dc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-GAMMA/1762652580.028181",
- "retrieved_timestamp": "1762652580.028182",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/VICIOUS_MESH-12B-GAMMA",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/VICIOUS_MESH-12B-GAMMA"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6361764562472019
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5181908355069679
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13066465256797583
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43632291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3666057180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-NEMO/6a9c649c-fbcd-489a-bc01-083014932a45.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-NEMO/6a9c649c-fbcd-489a-bc01-083014932a45.json
deleted file mode 100644
index 3cacdb9e98e2ecca792ac1a153b2d081f45b2ca4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-NEMO/6a9c649c-fbcd-489a-bc01-083014932a45.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-NEMO/1762652580.028384",
- "retrieved_timestamp": "1762652580.028385",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/VICIOUS_MESH-12B-NEMO",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/VICIOUS_MESH-12B-NEMO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40221944440750546
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5441680901949261
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1268882175226586
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3238255033557047
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42506249999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37159242021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-OMEGA/a630e843-ec9c-432b-986a-2b181c789507.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-OMEGA/a630e843-ec9c-432b-986a-2b181c789507.json
deleted file mode 100644
index a7c8298259ffb24946351bbc4b95c95148722d33..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-OMEGA/a630e843-ec9c-432b-986a-2b181c789507.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-OMEGA/1762652580.028594",
- "retrieved_timestamp": "1762652580.028594",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/VICIOUS_MESH-12B-OMEGA",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/VICIOUS_MESH-12B-OMEGA"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6699734482284783
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.516644373777888
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13444108761329304
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31543624161073824
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43232291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36768617021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-UNION/20d0e946-e7cf-48a6-a81e-f73d774e0e2b.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-UNION/20d0e946-e7cf-48a6-a81e-f73d774e0e2b.json
deleted file mode 100644
index 871c3c355b38e939d203db70f4b3da5f771797d1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B-UNION/20d0e946-e7cf-48a6-a81e-f73d774e0e2b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B-UNION/1762652580.028806",
- "retrieved_timestamp": "1762652580.028807",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/VICIOUS_MESH-12B-UNION",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/VICIOUS_MESH-12B-UNION"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6428709158366468
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5106643448765741
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13897280966767372
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31208053691275167
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4256875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3671875
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 6.124
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B/f2ef86c9-e968-42e0-a0d0-1cf79f9c249b.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B/f2ef86c9-e968-42e0-a0d0-1cf79f9c249b.json
deleted file mode 100644
index b840402a32532e60afbeefde252f03767b092858..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B/f2ef86c9-e968-42e0-a0d0-1cf79f9c249b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B/1762652580.026504",
- "retrieved_timestamp": "1762652580.026504",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/VICIOUS_MESH-12B",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/VICIOUS_MESH-12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37156965739792636
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5436022524587655
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13444108761329304
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32802013422818793
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4104895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36785239361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 6.124
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B_Razor/950f6bff-e0ec-4556-85b7-81444008d1d4.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B_Razor/950f6bff-e0ec-4556-85b7-81444008d1d4.json
deleted file mode 100644
index 925cada3344cf6ab91cd862ccfae2693c90118a0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_VICIOUS_MESH-12B_Razor/950f6bff-e0ec-4556-85b7-81444008d1d4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_VICIOUS_MESH-12B_Razor/1762652580.029016",
- "retrieved_timestamp": "1762652580.029016",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/VICIOUS_MESH-12B_Razor",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/VICIOUS_MESH-12B_Razor"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37364304489864675
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5447127693928118
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1299093655589124
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32298657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40915624999999994
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36685505319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 6.124
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_mergekit-model_stock-zdaysvi/8932da66-d29a-4453-9b61-bee48f1a28f1.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_mergekit-model_stock-zdaysvi/8932da66-d29a-4453-9b61-bee48f1a28f1.json
deleted file mode 100644
index dabbce7beb54f9d8a0164653cc3e555cd888e2ab..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_mergekit-model_stock-zdaysvi/8932da66-d29a-4453-9b61-bee48f1a28f1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_mergekit-model_stock-zdaysvi/1762652580.029272",
- "retrieved_timestamp": "1762652580.029272",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/mergekit-model_stock-zdaysvi",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/mergekit-model_stock-zdaysvi"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6425960894870055
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5062803896601668
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1351963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41238541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36884973404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 6.124
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_mergekit-ties-sinbkow/b8c00b3b-c35a-4511-965b-6096e9b116de.json b/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_mergekit-ties-sinbkow/b8c00b3b-c35a-4511-965b-6096e9b116de.json
deleted file mode 100644
index f8d280c9ca95a85101d207871c841af2aac946f8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bamec66557/bamec66557_mergekit-ties-sinbkow/b8c00b3b-c35a-4511-965b-6096e9b116de.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bamec66557_mergekit-ties-sinbkow/1762652580.029482",
- "retrieved_timestamp": "1762652580.029482",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bamec66557/mergekit-ties-sinbkow",
- "developer": "bamec66557",
- "inference_platform": "unknown",
- "id": "bamec66557/mergekit-ties-sinbkow"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6431956098706986
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5092084289828543
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14501510574018128
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40447916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36028922872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 6.124
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/belztjti/belztjti_dffghgjh/82b3c9ac-16bb-4fd0-8bed-af1ac598a424.json b/leaderboard_data/HFOpenLLMv2/belztjti/belztjti_dffghgjh/82b3c9ac-16bb-4fd0-8bed-af1ac598a424.json
deleted file mode 100644
index 947c8cb4fe4d45109dd7ef3c7fccb63b0e91d46a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/belztjti/belztjti_dffghgjh/82b3c9ac-16bb-4fd0-8bed-af1ac598a424.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/belztjti_dffghgjh/1762652580.0296938",
- "retrieved_timestamp": "1762652580.029695",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "belztjti/dffghgjh",
- "developer": "belztjti",
- "inference_platform": "unknown",
- "id": "belztjti/dffghgjh"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5784241368457914
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35817085768640783
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.023413897280966767
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34745833333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3421708776595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GlmForCausalLM",
- "params_billions": 9.543
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/belztjti/belztjti_dtfgv/655ea5ea-d94a-43eb-a4bf-182fd021d65a.json b/leaderboard_data/HFOpenLLMv2/belztjti/belztjti_dtfgv/655ea5ea-d94a-43eb-a4bf-182fd021d65a.json
deleted file mode 100644
index a3f77aecf84f01bb52d7143f69b56d7ac65be339..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/belztjti/belztjti_dtfgv/655ea5ea-d94a-43eb-a4bf-182fd021d65a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/belztjti_dtfgv/1762652580.029931",
- "retrieved_timestamp": "1762652580.029932",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "belztjti/dtfgv",
- "developer": "belztjti",
- "inference_platform": "unknown",
- "id": "belztjti/dtfgv"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.334450369464133
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32815316667476035
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01812688821752266
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26929530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3793958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15043218085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 9.543
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/beowolx/beowolx_CodeNinja-1.0-OpenChat-7B/fbe7d86c-8d1e-474a-bf85-35a139bdb08f.json b/leaderboard_data/HFOpenLLMv2/beowolx/beowolx_CodeNinja-1.0-OpenChat-7B/fbe7d86c-8d1e-474a-bf85-35a139bdb08f.json
deleted file mode 100644
index bf43f33ec374f08f3069c74723ad7908fc00d514..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/beowolx/beowolx_CodeNinja-1.0-OpenChat-7B/fbe7d86c-8d1e-474a-bf85-35a139bdb08f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/beowolx_CodeNinja-1.0-OpenChat-7B/1762652580.030703",
- "retrieved_timestamp": "1762652580.030704",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "beowolx/CodeNinja-1.0-OpenChat-7B",
- "developer": "beowolx",
- "inference_platform": "unknown",
- "id": "beowolx/CodeNinja-1.0-OpenChat-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5446770125489258
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4441338669403703
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06722054380664652
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42432291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3015292553191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/berkeley-nest/berkeley-nest_Starling-LM-7B-alpha/ddc116b6-5b9a-409f-a0ab-09e5630d1289.json b/leaderboard_data/HFOpenLLMv2/berkeley-nest/berkeley-nest_Starling-LM-7B-alpha/ddc116b6-5b9a-409f-a0ab-09e5630d1289.json
deleted file mode 100644
index b266e095dc6c94423fdb47753012c51c9f88124c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/berkeley-nest/berkeley-nest_Starling-LM-7B-alpha/ddc116b6-5b9a-409f-a0ab-09e5630d1289.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/berkeley-nest_Starling-LM-7B-alpha/1762652580.030957",
- "retrieved_timestamp": "1762652580.0309582",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "berkeley-nest/Starling-LM-7B-alpha",
- "developer": "berkeley-nest",
- "inference_platform": "unknown",
- "id": "berkeley-nest/Starling-LM-7B-alpha"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5480491761858536
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4440065261164004
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08383685800604229
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41201041666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3171542553191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_Gunny/e7d0c3d5-d962-49b5-a4b7-3cb7ac12735c.json b/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_Gunny/e7d0c3d5-d962-49b5-a4b7-3cb7ac12735c.json
deleted file mode 100644
index da4dce67dcba7fada15104f000154f600b870ec4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_Gunny/e7d0c3d5-d962-49b5-a4b7-3cb7ac12735c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bfuzzy1_Gunny/1762652580.031208",
- "retrieved_timestamp": "1762652580.031209",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bfuzzy1/Gunny",
- "developer": "bfuzzy1",
- "inference_platform": "unknown",
- "id": "bfuzzy1/Gunny"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7128629813339716
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45459857092962414
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1729607250755287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2785234899328859
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35828124999999994
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3038563829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron-c/71268c77-565a-401b-a51d-122060ed5945.json b/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron-c/71268c77-565a-401b-a51d-122060ed5945.json
deleted file mode 100644
index 91c18cdd9b2d806418648abc034977d8d120c307..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron-c/71268c77-565a-401b-a51d-122060ed5945.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron-c/1762652580.031654",
- "retrieved_timestamp": "1762652580.0316548",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bfuzzy1/acheron-c",
- "developer": "bfuzzy1",
- "inference_platform": "unknown",
- "id": "bfuzzy1/acheron-c"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19286714805604685
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30260703404313577
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0030211480362537764
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24748322147651006
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33821875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1171875
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.514
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron-d/1c9ba45f-1f3b-42ad-a603-ea7039fee22e.json b/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron-d/1c9ba45f-1f3b-42ad-a603-ea7039fee22e.json
deleted file mode 100644
index b7f366fb073ae982ed959790ac557ec80c5942e4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron-d/1c9ba45f-1f3b-42ad-a603-ea7039fee22e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron-d/1762652580.031856",
- "retrieved_timestamp": "1762652580.031857",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bfuzzy1/acheron-d",
- "developer": "bfuzzy1",
- "inference_platform": "unknown",
- "id": "bfuzzy1/acheron-d"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.192542454021995
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3139959864926003
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015105740181268883
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23657718120805368
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34971875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11344747340425532
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.514
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron-m/fdd707f8-df0b-4384-bc77-35f3fa8ec0a0.json b/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron-m/fdd707f8-df0b-4384-bc77-35f3fa8ec0a0.json
deleted file mode 100644
index 30d3673028a36e9bdfc49951020317614a40ef6f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron-m/fdd707f8-df0b-4384-bc77-35f3fa8ec0a0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron-m/1762652580.032056",
- "retrieved_timestamp": "1762652580.032057",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bfuzzy1/acheron-m",
- "developer": "bfuzzy1",
- "inference_platform": "unknown",
- "id": "bfuzzy1/acheron-m"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17583123889058808
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29284447696551025
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.00906344410876133
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3486666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11128656914893617
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.514
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron/2b74949a-c0a3-4061-8cf4-4330850af288.json b/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron/2b74949a-c0a3-4061-8cf4-4330850af288.json
deleted file mode 100644
index 97a3e12584fb681975e2710b1efd21c76d0497f1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_acheron/2b74949a-c0a3-4061-8cf4-4330850af288.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bfuzzy1_acheron/1762652580.031447",
- "retrieved_timestamp": "1762652580.031447",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bfuzzy1/acheron",
- "developer": "bfuzzy1",
- "inference_platform": "unknown",
- "id": "bfuzzy1/acheron"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19831269919369493
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3107918622526179
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01661631419939577
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23909395973154363
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3510520833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10962433510638298
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.514
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_llambses-1/3f04797b-fe6d-4cd5-a49e-b898a8db26a6.json b/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_llambses-1/3f04797b-fe6d-4cd5-a49e-b898a8db26a6.json
deleted file mode 100644
index b55af96a5aa0b7cfa7f4f8c9eb072a383617082c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bfuzzy1/bfuzzy1_llambses-1/3f04797b-fe6d-4cd5-a49e-b898a8db26a6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bfuzzy1_llambses-1/1762652580.032492",
- "retrieved_timestamp": "1762652580.032493",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bfuzzy1/llambses-1",
- "developer": "bfuzzy1",
- "inference_platform": "unknown",
- "id": "bfuzzy1/llambses-1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3553837152089788
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5046977405175623
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06873111782477341
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45290625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31399601063829785
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bhuvneshsaini/bhuvneshsaini_merged_model/44e6cddd-4ecc-499f-a6b7-d8ee0640c2f9.json b/leaderboard_data/HFOpenLLMv2/bhuvneshsaini/bhuvneshsaini_merged_model/44e6cddd-4ecc-499f-a6b7-d8ee0640c2f9.json
deleted file mode 100644
index 362361b1603ef0a5b6e52245f565866f33a20b35..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bhuvneshsaini/bhuvneshsaini_merged_model/44e6cddd-4ecc-499f-a6b7-d8ee0640c2f9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bhuvneshsaini_merged_model/1762652580.032705",
- "retrieved_timestamp": "1762652580.032706",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bhuvneshsaini/merged_model",
- "developer": "bhuvneshsaini",
- "inference_platform": "unknown",
- "id": "bhuvneshsaini/merged_model"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1812767900282362
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3359777949071243
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34971875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14453125
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 4.715
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bigcode/bigcode_starcoder2-15b/09aa04cf-9369-453f-952a-2f6c74e4707a.json b/leaderboard_data/HFOpenLLMv2/bigcode/bigcode_starcoder2-15b/09aa04cf-9369-453f-952a-2f6c74e4707a.json
deleted file mode 100644
index 2b9fad2c1347015b758cc95a413345795ef72187..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bigcode/bigcode_starcoder2-15b/09aa04cf-9369-453f-952a-2f6c74e4707a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bigcode_starcoder2-15b/1762652580.032956",
- "retrieved_timestamp": "1762652580.0329568",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bigcode/starcoder2-15b",
- "developer": "bigcode",
- "inference_platform": "unknown",
- "id": "bigcode/starcoder2-15b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2780223141265177
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4447957841230437
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05966767371601209
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27348993288590606
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35009375000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23528922872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Starcoder2ForCausalLM",
- "params_billions": 15.958
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bigcode/bigcode_starcoder2-3b/7385c595-5b4f-4491-8e71-ece57ffffbd2.json b/leaderboard_data/HFOpenLLMv2/bigcode/bigcode_starcoder2-3b/7385c595-5b4f-4491-8e71-ece57ffffbd2.json
deleted file mode 100644
index 0d2c8119a5f78c1f08d602deb4a0a92e13283a92..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bigcode/bigcode_starcoder2-3b/7385c595-5b4f-4491-8e71-ece57ffffbd2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bigcode_starcoder2-3b/1762652580.0331972",
- "retrieved_timestamp": "1762652580.0331972",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bigcode/starcoder2-3b",
- "developer": "bigcode",
- "inference_platform": "unknown",
- "id": "bigcode/starcoder2-3b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20370838264693236
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35087141384601755
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015105740181268883
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24412751677852348
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34345833333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1636469414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Starcoder2ForCausalLM",
- "params_billions": 3.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bigcode/bigcode_starcoder2-7b/53eac61a-064e-4786-bc94-962382d88f77.json b/leaderboard_data/HFOpenLLMv2/bigcode/bigcode_starcoder2-7b/53eac61a-064e-4786-bc94-962382d88f77.json
deleted file mode 100644
index abce2d86e976143cade702558d38febc36048dcc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bigcode/bigcode_starcoder2-7b/53eac61a-064e-4786-bc94-962382d88f77.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bigcode_starcoder2-7b/1762652580.0333922",
- "retrieved_timestamp": "1762652580.0333922",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bigcode/starcoder2-7b",
- "developer": "bigcode",
- "inference_platform": "unknown",
- "id": "bigcode/starcoder2-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22091938279321088
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36609857669123036
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.030966767371601207
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2516778523489933
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3793333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16422872340425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Starcoder2ForCausalLM",
- "params_billions": 7.174
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-1b1/284ba4fb-cae4-46ac-a5dd-a36fb145da55.json b/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-1b1/284ba4fb-cae4-46ac-a5dd-a36fb145da55.json
deleted file mode 100644
index c109d586bff04818d1a4e4e805f2fd925e2bf18f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-1b1/284ba4fb-cae4-46ac-a5dd-a36fb145da55.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bigscience_bloom-1b1/1762652580.033589",
- "retrieved_timestamp": "1762652580.033589",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bigscience/bloom-1b1",
- "developer": "bigscience",
- "inference_platform": "unknown",
- "id": "bigscience/bloom-1b1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13733781920858879
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31072762377370394
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.005287009063444109
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36999999999999994
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1107878989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "BloomForCausalLM",
- "params_billions": 1.065
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-1b7/8adb8bb9-d057-45df-827a-cd8f014b4ff6.json b/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-1b7/8adb8bb9-d057-45df-827a-cd8f014b4ff6.json
deleted file mode 100644
index accb79738af8ec8a5423a5aa52ec843bd108a8ae..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-1b7/8adb8bb9-d057-45df-827a-cd8f014b4ff6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bigscience_bloom-1b7/1762652580.033839",
- "retrieved_timestamp": "1762652580.033839",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bigscience/bloom-1b7",
- "developer": "bigscience",
- "inference_platform": "unknown",
- "id": "bigscience/bloom-1b7"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10438968603305895
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.314054919904072
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.005287009063444109
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25838926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38857291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10862699468085106
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "BloomForCausalLM",
- "params_billions": 1.722
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-3b/88f90805-7410-4ec1-ad19-8e8a146f1ba3.json b/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-3b/88f90805-7410-4ec1-ad19-8e8a146f1ba3.json
deleted file mode 100644
index 818bbd49b9bd72c378dc892375a866de4609bc93..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-3b/88f90805-7410-4ec1-ad19-8e8a146f1ba3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bigscience_bloom-3b/1762652580.034177",
- "retrieved_timestamp": "1762652580.034179",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bigscience/bloom-3b",
- "developer": "bigscience",
- "inference_platform": "unknown",
- "id": "bigscience/bloom-3b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1270961050013963
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062918592346337
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.008308157099697885
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23993288590604026
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3980625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11328125
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "BloomForCausalLM",
- "params_billions": 3.003
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-560m/82454b92-cca1-4ac8-a620-e1a8487a5b8e.json b/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-560m/82454b92-cca1-4ac8-a620-e1a8487a5b8e.json
deleted file mode 100644
index 85e4c8e5f5ef88b44239fb279f482eec8be2b5c3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-560m/82454b92-cca1-4ac8-a620-e1a8487a5b8e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bigscience_bloom-560m/1762652580.034546",
- "retrieved_timestamp": "1762652580.034548",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bigscience/bloom-560m",
- "developer": "bigscience",
- "inference_platform": "unknown",
- "id": "bigscience/bloom-560m"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06202431769926019
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3025950541549823
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0037764350453172208
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4030833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11643949468085106
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "BloomForCausalLM",
- "params_billions": 0.559
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-7b1/d5fe1452-b6ee-4f1d-9eca-713b49a6a941.json b/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-7b1/d5fe1452-b6ee-4f1d-9eca-713b49a6a941.json
deleted file mode 100644
index b5965fbbe8126f51387a9c592a53137655e38b45..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bigscience/bigscience_bloom-7b1/d5fe1452-b6ee-4f1d-9eca-713b49a6a941.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bigscience_bloom-7b1/1762652580.0348449",
- "retrieved_timestamp": "1762652580.034846",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bigscience/bloom-7b1",
- "developer": "bigscience",
- "inference_platform": "unknown",
- "id": "bigscience/bloom-7b1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13221696210499254
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3113718529627139
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.005287009063444109
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34869791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11045545212765957
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "BloomForCausalLM",
- "params_billions": 7.069
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bluuwhale/bluuwhale_L3-SthenoMaid-8B-V1/44dd13bc-56f0-4dd1-90d0-bb411239109a.json b/leaderboard_data/HFOpenLLMv2/bluuwhale/bluuwhale_L3-SthenoMaid-8B-V1/44dd13bc-56f0-4dd1-90d0-bb411239109a.json
deleted file mode 100644
index fe03a39b7d97cee9cdf9088f25bbccdb76cb3f0a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bluuwhale/bluuwhale_L3-SthenoMaid-8B-V1/44dd13bc-56f0-4dd1-90d0-bb411239109a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bluuwhale_L3-SthenoMaid-8B-V1/1762652580.035146",
- "retrieved_timestamp": "1762652580.035147",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bluuwhale/L3-SthenoMaid-8B-V1",
- "developer": "bluuwhale",
- "inference_platform": "unknown",
- "id": "bluuwhale/L3-SthenoMaid-8B-V1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7344700949037443
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5218759253208048
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10800604229607251
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3686979166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3656083776595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bond005/bond005_meno-tiny-0.1/109acb38-3026-4573-b082-8277b9501f09.json b/leaderboard_data/HFOpenLLMv2/bond005/bond005_meno-tiny-0.1/109acb38-3026-4573-b082-8277b9501f09.json
deleted file mode 100644
index 4969a9b605716060c126c1123dca01f50fcf17c8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bond005/bond005_meno-tiny-0.1/109acb38-3026-4573-b082-8277b9501f09.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bond005_meno-tiny-0.1/1762652580.035417",
- "retrieved_timestamp": "1762652580.035417",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bond005/meno-tiny-0.1",
- "developer": "bond005",
- "inference_platform": "unknown",
- "id": "bond005/meno-tiny-0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45497613000172876
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4262909130965971
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13897280966767372
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28187919463087246
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4184583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2785904255319149
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/braindao/braindao_Qwen2.5-14B-Instruct/cb442f90-a0e1-4588-900c-548b994a764d.json b/leaderboard_data/HFOpenLLMv2/braindao/braindao_Qwen2.5-14B-Instruct/cb442f90-a0e1-4588-900c-548b994a764d.json
deleted file mode 100644
index 83f5020377576b246cc980357df28b5c9a742153..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/braindao/braindao_Qwen2.5-14B-Instruct/cb442f90-a0e1-4588-900c-548b994a764d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/braindao_Qwen2.5-14B-Instruct/1762652580.040103",
- "retrieved_timestamp": "1762652580.040104",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "braindao/Qwen2.5-14B-Instruct",
- "developer": "braindao",
- "inference_platform": "unknown",
- "id": "braindao/Qwen2.5-14B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8142539572778007
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6403640774008682
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.552870090634441
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3288590604026846
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.414
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48894614361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/braindao/braindao_iq-code-evmind-0.5b/58f1b3d7-74a6-4ed0-b927-afaedfdda25f.json b/leaderboard_data/HFOpenLLMv2/braindao/braindao_iq-code-evmind-0.5b/58f1b3d7-74a6-4ed0-b927-afaedfdda25f.json
deleted file mode 100644
index 9d8ebfdce44f3d43a65b459a5e2e003dfe188565..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/braindao/braindao_iq-code-evmind-0.5b/58f1b3d7-74a6-4ed0-b927-afaedfdda25f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/braindao_iq-code-evmind-0.5b/1762652580.0403671",
- "retrieved_timestamp": "1762652580.040368",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "braindao/iq-code-evmind-0.5b",
- "developer": "braindao",
- "inference_platform": "unknown",
- "id": "braindao/iq-code-evmind-0.5b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3215612353001148
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31637440507987097
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02416918429003021
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24161073825503357
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33037500000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11893284574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Bgeneral-ECE-PRYMMAL-Martial/06d2ac1d-d70c-4cda-997d-9d4d1ef50c5a.json b/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Bgeneral-ECE-PRYMMAL-Martial/06d2ac1d-d70c-4cda-997d-9d4d1ef50c5a.json
deleted file mode 100644
index 3f85e2a869e1a5d42f075463520bf92c0751e520..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Bgeneral-ECE-PRYMMAL-Martial/06d2ac1d-d70c-4cda-997d-9d4d1ef50c5a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/brgx53_3Bgeneral-ECE-PRYMMAL-Martial/1762652580.040573",
- "retrieved_timestamp": "1762652580.0405738",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "brgx53/3Bgeneral-ECE-PRYMMAL-Martial",
- "developer": "brgx53",
- "inference_platform": "unknown",
- "id": "brgx53/3Bgeneral-ECE-PRYMMAL-Martial"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32893057088525113
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5458008312900208
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13141993957703926
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32466442953020136
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43728125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3933676861702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Phi3ForCausalLM",
- "params_billions": 3.821
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Bgeneralv2-ECE-PRYMMAL-Martial/c7f6603c-dcca-49b9-94bd-0a1fbf707dd9.json b/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Bgeneralv2-ECE-PRYMMAL-Martial/c7f6603c-dcca-49b9-94bd-0a1fbf707dd9.json
deleted file mode 100644
index 46cb1318a2db83054d129e2aa85b7450ba954dc0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Bgeneralv2-ECE-PRYMMAL-Martial/c7f6603c-dcca-49b9-94bd-0a1fbf707dd9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/brgx53_3Bgeneralv2-ECE-PRYMMAL-Martial/1762652580.040823",
- "retrieved_timestamp": "1762652580.0408242",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial",
- "developer": "brgx53",
- "inference_platform": "unknown",
- "id": "brgx53/3Bgeneralv2-ECE-PRYMMAL-Martial"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.567708125551315
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5607195549186694
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3496978851963746
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.311241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43563541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45054853723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Blareneg-ECE-PRYMMAL-Martial/6fea29aa-174f-4e3f-be91-c79842126c2c.json b/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Blareneg-ECE-PRYMMAL-Martial/6fea29aa-174f-4e3f-be91-c79842126c2c.json
deleted file mode 100644
index 5b5056a7bf7d28be40f2357f44c0943e6cdd8ff7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Blareneg-ECE-PRYMMAL-Martial/6fea29aa-174f-4e3f-be91-c79842126c2c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/brgx53_3Blareneg-ECE-PRYMMAL-Martial/1762652580.041033",
- "retrieved_timestamp": "1762652580.041034",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "brgx53/3Blareneg-ECE-PRYMMAL-Martial",
- "developer": "brgx53",
- "inference_platform": "unknown",
- "id": "brgx53/3Blareneg-ECE-PRYMMAL-Martial"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28763902002242936
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.535846215598753
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12084592145015106
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3347315436241611
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4428958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4015957446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Phi3ForCausalLM",
- "params_billions": 3.821
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Blarenegv2-ECE-PRYMMAL-Martial/64e92286-72ea-4318-aaea-4e0be87a0067.json b/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Blarenegv2-ECE-PRYMMAL-Martial/64e92286-72ea-4318-aaea-4e0be87a0067.json
deleted file mode 100644
index eb2ce0e5d10b9f60571ecb548aa19bcd5167ddfa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_3Blarenegv2-ECE-PRYMMAL-Martial/64e92286-72ea-4318-aaea-4e0be87a0067.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/brgx53_3Blarenegv2-ECE-PRYMMAL-Martial/1762652580.04124",
- "retrieved_timestamp": "1762652580.04124",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "brgx53/3Blarenegv2-ECE-PRYMMAL-Martial",
- "developer": "brgx53",
- "inference_platform": "unknown",
- "id": "brgx53/3Blarenegv2-ECE-PRYMMAL-Martial"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5661843907498769
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5607195549186694
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3496978851963746
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.311241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43563541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45054853723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_Barracuda-PRYMMAL-ECE-TW3/70a11b76-f8e4-4cfb-8ab6-791c7e9ba113.json b/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_Barracuda-PRYMMAL-ECE-TW3/70a11b76-f8e4-4cfb-8ab6-791c7e9ba113.json
deleted file mode 100644
index 38954e57766daae333d7a9e78656375f5746356a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_Barracuda-PRYMMAL-ECE-TW3/70a11b76-f8e4-4cfb-8ab6-791c7e9ba113.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/brgx53_Barracuda-PRYMMAL-ECE-TW3/1762652580.041505",
- "retrieved_timestamp": "1762652580.041506",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "brgx53/Barracuda-PRYMMAL-ECE-TW3",
- "developer": "brgx53",
- "inference_platform": "unknown",
- "id": "brgx53/Barracuda-PRYMMAL-ECE-TW3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16401592219754696
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30024599561514337
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0022658610271903325
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2533557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36085416666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10929188829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_LaConfiance-PRYMMAL-ECE-TW3/f4766bd8-0130-4ed1-ae1c-8177a65d94a9.json b/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_LaConfiance-PRYMMAL-ECE-TW3/f4766bd8-0130-4ed1-ae1c-8177a65d94a9.json
deleted file mode 100644
index 912079853b0d80183b6e2c51e71a44588540524f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/brgx53/brgx53_LaConfiance-PRYMMAL-ECE-TW3/f4766bd8-0130-4ed1-ae1c-8177a65d94a9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/brgx53_LaConfiance-PRYMMAL-ECE-TW3/1762652580.041717",
- "retrieved_timestamp": "1762652580.041717",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "brgx53/LaConfiance-PRYMMAL-ECE-TW3",
- "developer": "brgx53",
- "inference_platform": "unknown",
- "id": "brgx53/LaConfiance-PRYMMAL-ECE-TW3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1579209829917951
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29624186550380993
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2516778523489933
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38457291666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11461103723404255
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Blabbertron-1.0/195957fa-9d4e-49ec-afd9-17125ebcf62d.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Blabbertron-1.0/195957fa-9d4e-49ec-afd9-17125ebcf62d.json
deleted file mode 100644
index a84739d6a2cc34a6ff70e4a2ca85880700282dd6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Blabbertron-1.0/195957fa-9d4e-49ec-afd9-17125ebcf62d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Blabbertron-1.0/1762652580.0421708",
- "retrieved_timestamp": "1762652580.042172",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Blabbertron-1.0",
- "developer": "bunnycore",
- "inference_platform": "unknown",
- "id": "bunnycore/Blabbertron-1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7433376773627309
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5496552006589083
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49244712990936557
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4336875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4354222074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Blabbertron-1.1/9fbe416c-de18-4f83-812c-f48071a49917.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Blabbertron-1.1/9fbe416c-de18-4f83-812c-f48071a49917.json
deleted file mode 100644
index b1a0cdba925bc4798ab1e4a4bc727d1096998222..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Blabbertron-1.1/9fbe416c-de18-4f83-812c-f48071a49917.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Blabbertron-1.1/1762652580.0424142",
- "retrieved_timestamp": "1762652580.0424151",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Blabbertron-1.1",
- "developer": "bunnycore",
- "inference_platform": "unknown",
- "id": "bunnycore/Blabbertron-1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7265267268625026
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5534000697428705
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48036253776435045
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4415625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44306848404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_DeepThinker-7B-Sce-v1/814129ce-9101-4d9b-9e53-9161a010743f.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_DeepThinker-7B-Sce-v1/814129ce-9101-4d9b-9e53-9161a010743f.json
deleted file mode 100644
index ff9f210af59b7545820580355a336f680b5f78ae..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_DeepThinker-7B-Sce-v1/814129ce-9101-4d9b-9e53-9161a010743f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_DeepThinker-7B-Sce-v1/1762652580.043317",
- "retrieved_timestamp": "1762652580.043317",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/DeepThinker-7B-Sce-v1",
- "developer": "bunnycore",
- "inference_platform": "unknown",
- "id": "bunnycore/DeepThinker-7B-Sce-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12180015691698028
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30182806791122846
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.009818731117824773
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2516778523489933
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41942708333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11228390957446809
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_DeepThinker-7B-Sce-v2/82cc30d2-9bb6-499f-b522-c66688e07c00.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_DeepThinker-7B-Sce-v2/82cc30d2-9bb6-499f-b522-c66688e07c00.json
deleted file mode 100644
index 2b800219f67ccc821b6b7c7b87d9b838d5a02c97..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_DeepThinker-7B-Sce-v2/82cc30d2-9bb6-499f-b522-c66688e07c00.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_DeepThinker-7B-Sce-v2/1762652580.0435221",
- "retrieved_timestamp": "1762652580.043523",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/DeepThinker-7B-Sce-v2",
- "developer": "bunnycore",
- "inference_platform": "unknown",
- "id": "bunnycore/DeepThinker-7B-Sce-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16306621985221434
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3056842322947901
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.011329305135951661
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25838926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4100625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11461103723404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_FuseCyberMix-Qwen-2.5-7B-Instruct/d851bc0d-5f11-40f6-982c-39809dffe946.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_FuseCyberMix-Qwen-2.5-7B-Instruct/d851bc0d-5f11-40f6-982c-39809dffe946.json
deleted file mode 100644
index a4a6c1f0e5f80517e66a477a7b2c817df8ea0146..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_FuseCyberMix-Qwen-2.5-7B-Instruct/d851bc0d-5f11-40f6-982c-39809dffe946.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_FuseCyberMix-Qwen-2.5-7B-Instruct/1762652580.043724",
- "retrieved_timestamp": "1762652580.043725",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct",
- "developer": "bunnycore",
- "inference_platform": "unknown",
- "id": "bunnycore/FuseCyberMix-Qwen-2.5-7B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7019220113742648
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5517973725429837
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48413897280966767
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40203125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43367686170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_FuseQwQen-7B/06b6f8e3-f3c7-43a6-bb69-e1eb3bd10b7a.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_FuseQwQen-7B/06b6f8e3-f3c7-43a6-bb69-e1eb3bd10b7a.json
deleted file mode 100644
index 703ab06846db9b1923292e14d267e898efcfad84..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_FuseQwQen-7B/06b6f8e3-f3c7-43a6-bb69-e1eb3bd10b7a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_FuseQwQen-7B/1762652580.0439281",
- "retrieved_timestamp": "1762652580.043929",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/FuseQwQen-7B",
- "developer": "bunnycore",
- "inference_platform": "unknown",
- "id": "bunnycore/FuseQwQen-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7274509412802475
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5504256932515404
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43655589123867067
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4216875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4406582446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Maestro-S1k-7B-Sce/cc0c2de6-5a8d-4229-bd92-a1ad0b95a6b0.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Maestro-S1k-7B-Sce/cc0c2de6-5a8d-4229-bd92-a1ad0b95a6b0.json
deleted file mode 100644
index ad156157d8711d24f70462075d8a4e93cc13c71a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Maestro-S1k-7B-Sce/cc0c2de6-5a8d-4229-bd92-a1ad0b95a6b0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Maestro-S1k-7B-Sce/1762652580.048955",
- "retrieved_timestamp": "1762652580.048955",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Maestro-S1k-7B-Sce",
- "developer": "bunnycore",
- "inference_platform": "unknown",
- "id": "bunnycore/Maestro-S1k-7B-Sce"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2522684255553044
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104380842714463
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.027945619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3768229166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11702127659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Qandora-2.5-7B-Creative/acd82774-f29a-4b19-b08c-693706bb4603.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Qandora-2.5-7B-Creative/acd82774-f29a-4b19-b08c-693706bb4603.json
deleted file mode 100644
index 23b76e19d2f85cee961b8e3164d8e452c60a6e2b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Qandora-2.5-7B-Creative/acd82774-f29a-4b19-b08c-693706bb4603.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qandora-2.5-7B-Creative/1762652580.0529459",
- "retrieved_timestamp": "1762652580.052947",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qandora-2.5-7B-Creative",
- "developer": "bunnycore",
- "inference_platform": "unknown",
- "id": "bunnycore/Qandora-2.5-7B-Creative"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6803148978044922
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5541763892398439
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30589123867069484
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4211875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4479720744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QandoraExp-7B-Persona/4e9dc7ca-f4f2-4c1f-b532-628a8d9d515b.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QandoraExp-7B-Persona/4e9dc7ca-f4f2-4c1f-b532-628a8d9d515b.json
deleted file mode 100644
index 077d5930abba8c6cbb955a64787e7f48037c2731..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QandoraExp-7B-Persona/4e9dc7ca-f4f2-4c1f-b532-628a8d9d515b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_QandoraExp-7B-Persona/1762652580.0533981",
- "retrieved_timestamp": "1762652580.053399",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/QandoraExp-7B-Persona",
- "developer": "bunnycore",
- "inference_platform": "unknown",
- "id": "bunnycore/QandoraExp-7B-Persona"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6246858335882126
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5558337526959515
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104229607250755
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3145973154362416
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43715624999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44074135638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QandoraExp-7B-v2/85bc0517-382e-4a4c-ac31-ee6de74d2c8f.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QandoraExp-7B-v2/85bc0517-382e-4a4c-ac31-ee6de74d2c8f.json
deleted file mode 100644
index 1327641090433e2cd3a5cedfd0532a81876caa40..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QandoraExp-7B-v2/85bc0517-382e-4a4c-ac31-ee6de74d2c8f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_QandoraExp-7B-v2/1762652580.053621",
- "retrieved_timestamp": "1762652580.053621",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/QandoraExp-7B-v2",
- "developer": "bunnycore",
- "inference_platform": "unknown",
- "id": "bunnycore/QandoraExp-7B-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5606889719278182
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5444864824489132
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47129909365558914
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40454166666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.390874335106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QandoraExp-7B/744f9f56-fbb4-450f-9427-35e6e49ca014.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QandoraExp-7B/744f9f56-fbb4-450f-9427-35e6e49ca014.json
deleted file mode 100644
index 92af2878eb87b8036419088363cd144270e19b86..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QandoraExp-7B/744f9f56-fbb4-450f-9427-35e6e49ca014.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_QandoraExp-7B/1762652580.0531762",
- "retrieved_timestamp": "1762652580.0531762",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/QandoraExp-7B",
- "developer": "bunnycore",
- "inference_platform": "unknown",
- "id": "bunnycore/QandoraExp-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7509064836855099
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5477959748047708
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4743202416918429
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43120833333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4409906914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QwQen-3B-LCoT-R1/636c4294-b3d0-42fc-b437-e4a80f70b4d9.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QwQen-3B-LCoT-R1/636c4294-b3d0-42fc-b437-e4a80f70b4d9.json
deleted file mode 100644
index 63fd5cf9fda5179dd212007aeee3b1e0624a68e7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QwQen-3B-LCoT-R1/636c4294-b3d0-42fc-b437-e4a80f70b4d9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_QwQen-3B-LCoT-R1/1762652580.05408",
- "retrieved_timestamp": "1762652580.054081",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/QwQen-3B-LCoT-R1",
- "developer": "bunnycore",
- "inference_platform": "unknown",
- "id": "bunnycore/QwQen-3B-LCoT-R1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.534160471992092
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4798600168403517
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33534743202416917
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41384375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3723404255319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.085
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QwQen-3B-LCoT/bff23021-087b-4118-ba4d-219a97a1dedc.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QwQen-3B-LCoT/bff23021-087b-4118-ba4d-219a97a1dedc.json
deleted file mode 100644
index 3652835b507237f52c46c6f4a07410a41cc50537..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_QwQen-3B-LCoT/bff23021-087b-4118-ba4d-219a97a1dedc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_QwQen-3B-LCoT/1762652580.05384",
- "retrieved_timestamp": "1762652580.0538409",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/QwQen-3B-LCoT",
- "developer": "bunnycore",
- "inference_platform": "unknown",
- "id": "bunnycore/QwQen-3B-LCoT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6025290673191577
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4899306773152123
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36178247734138974
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26677852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41778125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3699301861702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.397
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Qwen2.5-7B-Instruct-Fusion/6d88de9c-062d-4858-95ef-a05f6a29b6c3.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Qwen2.5-7B-Instruct-Fusion/6d88de9c-062d-4858-95ef-a05f6a29b6c3.json
deleted file mode 100644
index fb0f96fbd83ba068d2dbc8041dea3f30afed443c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Qwen2.5-7B-Instruct-Fusion/6d88de9c-062d-4858-95ef-a05f6a29b6c3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-Instruct-Fusion/1762652580.0585442",
- "retrieved_timestamp": "1762652580.0585449",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen2.5-7B-Instruct-Fusion",
- "developer": "bunnycore",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen2.5-7B-Instruct-Fusion"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6962016338869754
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5491903018724945
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3406344410876133
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42971875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4467253989361702
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Qwen2.5-7B-Instruct-Merge-Stock-v0.1/fe31c10e-8231-49f4-afb3-e2588396c032.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Qwen2.5-7B-Instruct-Merge-Stock-v0.1/fe31c10e-8231-49f4-afb3-e2588396c032.json
deleted file mode 100644
index 04533db90ed6b2d6c6257b80a6d9594af2dc414b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Qwen2.5-7B-Instruct-Merge-Stock-v0.1/fe31c10e-8231-49f4-afb3-e2588396c032.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Qwen2.5-7B-Instruct-Merge-Stock-v0.1/1762652580.0587678",
- "retrieved_timestamp": "1762652580.058769",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1",
- "developer": "bunnycore",
- "inference_platform": "unknown",
- "id": "bunnycore/Qwen2.5-7B-Instruct-Merge-Stock-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7509064836855099
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5529431709465797
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48942598187311176
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42311458333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4383311170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_SmolLM2-1.7-Persona/5249691a-3672-4ccd-98dd-d9b937bca750.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_SmolLM2-1.7-Persona/5249691a-3672-4ccd-98dd-d9b937bca750.json
deleted file mode 100644
index f8892d26832ca51eebbbe1f26aa39c454c83f690..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_SmolLM2-1.7-Persona/5249691a-3672-4ccd-98dd-d9b937bca750.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_SmolLM2-1.7-Persona/1762652580.062155",
- "retrieved_timestamp": "1762652580.062156",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/SmolLM2-1.7-Persona",
- "developer": "bunnycore",
- "inference_platform": "unknown",
- "id": "bunnycore/SmolLM2-1.7-Persona"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5465254413844156
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3623213930905173
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05664652567975831
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.334125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1973902925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.711
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_SmolLM2-1.7B-roleplay-lora/ae109e51-8631-4e09-8839-8e9ed74da4c7.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_SmolLM2-1.7B-roleplay-lora/ae109e51-8631-4e09-8839-8e9ed74da4c7.json
deleted file mode 100644
index 8c4d7eec661f8a7cf3afe486ae1ed964f9189d48..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_SmolLM2-1.7B-roleplay-lora/ae109e51-8631-4e09-8839-8e9ed74da4c7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_SmolLM2-1.7B-roleplay-lora/1762652580.062429",
- "retrieved_timestamp": "1762652580.06243",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/SmolLM2-1.7B-roleplay-lora",
- "developer": "bunnycore",
- "inference_platform": "unknown",
- "id": "bunnycore/SmolLM2-1.7B-roleplay-lora"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5382075116247114
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3610343412303005
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.052870090634441085
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2751677852348993
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33945833333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19664228723404256
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "?",
- "params_billions": 3.423
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Tulu-3.1-8B-SuperNova/cd979586-e334-4964-b06c-f33c66f09c0e.json b/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Tulu-3.1-8B-SuperNova/cd979586-e334-4964-b06c-f33c66f09c0e.json
deleted file mode 100644
index 0de3813bcaf001e1a465ee5b7a5ac775f819996a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/bunnycore/bunnycore_Tulu-3.1-8B-SuperNova/cd979586-e334-4964-b06c-f33c66f09c0e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Tulu-3.1-8B-SuperNova/1762652580.062763",
- "retrieved_timestamp": "1762652580.0627651",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Tulu-3.1-8B-SuperNova",
- "developer": "bunnycore",
- "inference_platform": "unknown",
- "id": "bunnycore/Tulu-3.1-8B-SuperNova"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8193748143813969
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5254122754311122
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24622356495468278
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3935
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3813996010638298
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/byroneverson/byroneverson_Mistral-Small-Instruct-2409-abliterated/ff0c627b-72b9-45d4-a385-49c8b0ae6b6e.json b/leaderboard_data/HFOpenLLMv2/byroneverson/byroneverson_Mistral-Small-Instruct-2409-abliterated/ff0c627b-72b9-45d4-a385-49c8b0ae6b6e.json
deleted file mode 100644
index 0b0eb1a5aec8f57cd12de75b34a91384632effdf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/byroneverson/byroneverson_Mistral-Small-Instruct-2409-abliterated/ff0c627b-72b9-45d4-a385-49c8b0ae6b6e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/byroneverson_Mistral-Small-Instruct-2409-abliterated/1762652580.063036",
- "retrieved_timestamp": "1762652580.063037",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "byroneverson/Mistral-Small-Instruct-2409-abliterated",
- "developer": "byroneverson",
- "inference_platform": "unknown",
- "id": "byroneverson/Mistral-Small-Instruct-2409-abliterated"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6970759806203096
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5237864400325174
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24773413897280966
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33305369127516776
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36971875000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39228723404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 22.247
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/byroneverson/byroneverson_Yi-1.5-9B-Chat-16K-abliterated/dc783bb0-c784-4cf4-888b-36a3bfa37a84.json b/leaderboard_data/HFOpenLLMv2/byroneverson/byroneverson_Yi-1.5-9B-Chat-16K-abliterated/dc783bb0-c784-4cf4-888b-36a3bfa37a84.json
deleted file mode 100644
index 1e0526c1d43ce59e4fc33663093ca0deb8f84f38..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/byroneverson/byroneverson_Yi-1.5-9B-Chat-16K-abliterated/dc783bb0-c784-4cf4-888b-36a3bfa37a84.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/byroneverson_Yi-1.5-9B-Chat-16K-abliterated/1762652580.068388",
- "retrieved_timestamp": "1762652580.068392",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "byroneverson/Yi-1.5-9B-Chat-16K-abliterated",
- "developer": "byroneverson",
- "inference_platform": "unknown",
- "id": "byroneverson/Yi-1.5-9B-Chat-16K-abliterated"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5528453392553979
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5282050829986801
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14123867069486404
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4734375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38231382978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.829
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/byroneverson/byroneverson_Yi-1.5-9B-Chat-abliterated/345560e2-c981-4aca-9388-4f3a5e95ace8.json b/leaderboard_data/HFOpenLLMv2/byroneverson/byroneverson_Yi-1.5-9B-Chat-abliterated/345560e2-c981-4aca-9388-4f3a5e95ace8.json
deleted file mode 100644
index 985fb6bf85fb7714163adb35915336090add81bb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/byroneverson/byroneverson_Yi-1.5-9B-Chat-abliterated/345560e2-c981-4aca-9388-4f3a5e95ace8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/byroneverson_Yi-1.5-9B-Chat-abliterated/1762652580.070213",
- "retrieved_timestamp": "1762652580.070215",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "byroneverson/Yi-1.5-9B-Chat-abliterated",
- "developer": "byroneverson",
- "inference_platform": "unknown",
- "id": "byroneverson/Yi-1.5-9B-Chat-abliterated"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5723291976400395
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5401219363002313
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1661631419939577
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43886458333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3715093085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.829
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/c10x/c10x_Q-Pluse/2093ba5f-d2f8-45d2-bcf7-ff48810c47af.json b/leaderboard_data/HFOpenLLMv2/c10x/c10x_Q-Pluse/2093ba5f-d2f8-45d2-bcf7-ff48810c47af.json
deleted file mode 100644
index 5680463d07d45110676ddb5c93086563359625f5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/c10x/c10x_Q-Pluse/2093ba5f-d2f8-45d2-bcf7-ff48810c47af.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/c10x_Q-Pluse/1762652580.070795",
- "retrieved_timestamp": "1762652580.070796",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "c10x/Q-Pluse",
- "developer": "c10x",
- "inference_platform": "unknown",
- "id": "c10x/Q-Pluse"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11228318638988993
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2875111436321769
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24664429530201343
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39381249999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11353058510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/c10x/c10x_longthinker/fe7bd3bb-71a4-46dd-a86d-b5a24b685fa5.json b/leaderboard_data/HFOpenLLMv2/c10x/c10x_longthinker/fe7bd3bb-71a4-46dd-a86d-b5a24b685fa5.json
deleted file mode 100644
index 325dee7447e8815fe1fb725e9b5ef3de1cc8c2c4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/c10x/c10x_longthinker/fe7bd3bb-71a4-46dd-a86d-b5a24b685fa5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/c10x_longthinker/1762652580.078971",
- "retrieved_timestamp": "1762652580.078974",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "c10x/longthinker",
- "developer": "c10x",
- "inference_platform": "unknown",
- "id": "c10x/longthinker"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36087913403103766
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49274888053364546
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23187311178247735
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3909583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3527260638297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/carsenk/carsenk_flippa-v6/a4bcc6f3-b745-48f7-a394-90cd42363aae.json b/leaderboard_data/HFOpenLLMv2/carsenk/carsenk_flippa-v6/a4bcc6f3-b745-48f7-a394-90cd42363aae.json
deleted file mode 100644
index 2843e36d2b4f05061b352479dfe17e4dc640ef8c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/carsenk/carsenk_flippa-v6/a4bcc6f3-b745-48f7-a394-90cd42363aae.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/carsenk_flippa-v6/1762652580.079394",
- "retrieved_timestamp": "1762652580.079395",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "carsenk/flippa-v6",
- "developer": "carsenk",
- "inference_platform": "unknown",
- "id": "carsenk/flippa-v6"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3439429602344003
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5046972457053399
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1404833836858006
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29278523489932884
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40887500000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3667719414893617
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "?",
- "params_billions": 16.061
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/cgato/cgato_TheSalt-L3-8b-v0.3.2/aa805bcc-3847-40b5-86eb-397982106d18.json b/leaderboard_data/HFOpenLLMv2/cgato/cgato_TheSalt-L3-8b-v0.3.2/aa805bcc-3847-40b5-86eb-397982106d18.json
deleted file mode 100644
index 4b82dee13283ae206ac1e53c3fb6f7298f8c8128..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/cgato/cgato_TheSalt-L3-8b-v0.3.2/aa805bcc-3847-40b5-86eb-397982106d18.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/cgato_TheSalt-L3-8b-v0.3.2/1762652580.100134",
- "retrieved_timestamp": "1762652580.100136",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "cgato/TheSalt-L3-8b-v0.3.2",
- "developer": "cgato",
- "inference_platform": "unknown",
- "id": "cgato/TheSalt-L3-8b-v0.3.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27050337548814923
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29679653176003074
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04758308157099698
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38962499999999994
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11394614361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/chujiezheng/chujiezheng_Llama-3-Instruct-8B-SimPO-ExPO/bdf85c5c-6eaa-4df6-a393-66b71aa28952.json b/leaderboard_data/HFOpenLLMv2/chujiezheng/chujiezheng_Llama-3-Instruct-8B-SimPO-ExPO/bdf85c5c-6eaa-4df6-a393-66b71aa28952.json
deleted file mode 100644
index ec1f92016b66db0c6dc4994cb119ee67bd15e83c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/chujiezheng/chujiezheng_Llama-3-Instruct-8B-SimPO-ExPO/bdf85c5c-6eaa-4df6-a393-66b71aa28952.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/chujiezheng_Llama-3-Instruct-8B-SimPO-ExPO/1762652580.1008909",
- "retrieved_timestamp": "1762652580.100893",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO",
- "developer": "chujiezheng",
- "inference_platform": "unknown",
- "id": "chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6433707008515184
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4764515968840137
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0702416918429003
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28691275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3920104166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.340093085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/cjvt/cjvt_GaMS-1B/e9acbb25-2b96-4a2a-92ff-d2b68c0e49f8.json b/leaderboard_data/HFOpenLLMv2/cjvt/cjvt_GaMS-1B/e9acbb25-2b96-4a2a-92ff-d2b68c0e49f8.json
deleted file mode 100644
index 507e6b26125eda2cd40974598b9d8a2f4fd72931..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/cjvt/cjvt_GaMS-1B/e9acbb25-2b96-4a2a-92ff-d2b68c0e49f8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/cjvt_GaMS-1B/1762652580.101496",
- "retrieved_timestamp": "1762652580.1014972",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "cjvt/GaMS-1B",
- "developer": "cjvt",
- "inference_platform": "unknown",
- "id": "cjvt/GaMS-1B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.163541625110263
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3074752552734472
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.013595166163141994
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25838926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36841666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11486037234042554
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "OPTForCausalLM",
- "params_billions": 1.54
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Mixtral_11Bx2_MoE_19B/9be76c82-0f70-4b76-8476-7707d4da85bb.json b/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Mixtral_11Bx2_MoE_19B/9be76c82-0f70-4b76-8476-7707d4da85bb.json
deleted file mode 100644
index d617801d92c79b8040c10a694e07653608bada50..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Mixtral_11Bx2_MoE_19B/9be76c82-0f70-4b76-8476-7707d4da85bb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/cloudyu_Mixtral_11Bx2_MoE_19B/1762652580.102268",
- "retrieved_timestamp": "1762652580.102269",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "cloudyu/Mixtral_11Bx2_MoE_19B",
- "developer": "cloudyu",
- "inference_platform": "unknown",
- "id": "cloudyu/Mixtral_11Bx2_MoE_19B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3850837998732253
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5208516020145867
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06722054380664652
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4296875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33111702127659576
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 19.188
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Mixtral_34Bx2_MoE_60B/fdbef33b-dffb-4146-bc83-f8b03c842b2e.json b/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Mixtral_34Bx2_MoE_60B/fdbef33b-dffb-4146-bc83-f8b03c842b2e.json
deleted file mode 100644
index 8e537411a1909632e9c3bfcb55feb8f143241c50..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Mixtral_34Bx2_MoE_60B/fdbef33b-dffb-4146-bc83-f8b03c842b2e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/cloudyu_Mixtral_34Bx2_MoE_60B/1762652580.102543",
- "retrieved_timestamp": "1762652580.1025438",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "cloudyu/Mixtral_34Bx2_MoE_60B",
- "developer": "cloudyu",
- "inference_platform": "unknown",
- "id": "cloudyu/Mixtral_34Bx2_MoE_60B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4537770892343427
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5869701263465353
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0770392749244713
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4625208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47664561170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 60.814
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Mixtral_7Bx2_MoE/b6c048f5-b01e-4e51-8a6c-c068dfd199ef.json b/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Mixtral_7Bx2_MoE/b6c048f5-b01e-4e51-8a6c-c068dfd199ef.json
deleted file mode 100644
index 584a9d460146bfb557e1fbf875b1199efd1895ab..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Mixtral_7Bx2_MoE/b6c048f5-b01e-4e51-8a6c-c068dfd199ef.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/cloudyu_Mixtral_7Bx2_MoE/1762652580.102766",
- "retrieved_timestamp": "1762652580.102767",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "cloudyu/Mixtral_7Bx2_MoE",
- "developer": "cloudyu",
- "inference_platform": "unknown",
- "id": "cloudyu/Mixtral_7Bx2_MoE"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4480068440626427
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5159732691655027
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06873111782477341
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44729166666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30435505319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 12.879
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Yi-34Bx2-MoE-60B-DPO/542d450b-8108-4abe-a2ae-5b9a577558d6.json b/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Yi-34Bx2-MoE-60B-DPO/542d450b-8108-4abe-a2ae-5b9a577558d6.json
deleted file mode 100644
index 4c50c883b0be9f993bb07c5384aa76117040b245..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/cloudyu/cloudyu_Yi-34Bx2-MoE-60B-DPO/542d450b-8108-4abe-a2ae-5b9a577558d6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/cloudyu_Yi-34Bx2-MoE-60B-DPO/1762652580.108832",
- "retrieved_timestamp": "1762652580.1088362",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "cloudyu/Yi-34Bx2-MoE-60B-DPO",
- "developer": "cloudyu",
- "inference_platform": "unknown",
- "id": "cloudyu/Yi-34Bx2-MoE-60B-DPO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.531887613753729
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.516831447641953
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0702416918429003
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3221476510067114
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43746875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46766954787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 60.814
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/cpayne1303/cpayne1303_cp2024-instruct/247e1c1e-ce27-4645-a2ae-4177f08ea4a5.json b/leaderboard_data/HFOpenLLMv2/cpayne1303/cpayne1303_cp2024-instruct/247e1c1e-ce27-4645-a2ae-4177f08ea4a5.json
deleted file mode 100644
index 0d50fb2bcf0251bf2190686337c2cb983c5f25ef..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/cpayne1303/cpayne1303_cp2024-instruct/247e1c1e-ce27-4645-a2ae-4177f08ea4a5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/cpayne1303_cp2024-instruct/1762652580.116854",
- "retrieved_timestamp": "1762652580.116854",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "cpayne1303/cp2024-instruct",
- "developer": "cpayne1303",
- "inference_platform": "unknown",
- "id": "cpayne1303/cp2024-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17061064641817045
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2946778102988436
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3686354166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11668882978723404
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.031
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/cpayne1303/cpayne1303_cp2024/2bfb7bea-a344-4249-8bdc-e6c483518df5.json b/leaderboard_data/HFOpenLLMv2/cpayne1303/cpayne1303_cp2024/2bfb7bea-a344-4249-8bdc-e6c483518df5.json
deleted file mode 100644
index 5e4f63731aaa22e6978744758314c68cf8e29a1a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/cpayne1303/cpayne1303_cp2024/2bfb7bea-a344-4249-8bdc-e6c483518df5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/cpayne1303_cp2024/1762652580.116582",
- "retrieved_timestamp": "1762652580.1165829",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "cpayne1303/cp2024",
- "developer": "cpayne1303",
- "inference_platform": "unknown",
- "id": "cpayne1303/cp2024"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16581448334862608
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29853854089245085
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.005287009063444109
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2558724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3383125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11012300531914894
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.031
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/cpayne1303/cpayne1303_smallcp2024/fcbede38-3a5b-4cd7-b144-cbf26cc05df9.json b/leaderboard_data/HFOpenLLMv2/cpayne1303/cpayne1303_smallcp2024/fcbede38-3a5b-4cd7-b144-cbf26cc05df9.json
deleted file mode 100644
index 295373ed1dcac98484a94e457a3708494aaa27f1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/cpayne1303/cpayne1303_smallcp2024/fcbede38-3a5b-4cd7-b144-cbf26cc05df9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/cpayne1303_smallcp2024/1762652580.117528",
- "retrieved_timestamp": "1762652580.117528",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "cpayne1303/smallcp2024",
- "developer": "cpayne1303",
- "inference_platform": "unknown",
- "id": "cpayne1303/smallcp2024"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1581958093414363
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3027047714604053
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.005287009063444109
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23070469798657717
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34246874999999993
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11136968085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.002
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/crestf411/crestf411_MN-Slush/b32a7808-7a64-41a8-aad4-030efc512906.json b/leaderboard_data/HFOpenLLMv2/crestf411/crestf411_MN-Slush/b32a7808-7a64-41a8-aad4-030efc512906.json
deleted file mode 100644
index 014effd43be8f804c95e7f1d032548f716682df0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/crestf411/crestf411_MN-Slush/b32a7808-7a64-41a8-aad4-030efc512906.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/crestf411_MN-Slush/1762652580.117737",
- "retrieved_timestamp": "1762652580.117738",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "crestf411/MN-Slush",
- "developer": "crestf411",
- "inference_platform": "unknown",
- "id": "crestf411/MN-Slush"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4077148632295642
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5340014235282594
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1268882175226586
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3238255033557047
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39328125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3508144946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/cyberagent/cyberagent_calm3-22b-chat/b7ce290d-d082-4586-ac4b-516e8130ddc2.json b/leaderboard_data/HFOpenLLMv2/cyberagent/cyberagent_calm3-22b-chat/b7ce290d-d082-4586-ac4b-516e8130ddc2.json
deleted file mode 100644
index ed290f1f1763b0416a5e215b1f9d730b944fdd0c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/cyberagent/cyberagent_calm3-22b-chat/b7ce290d-d082-4586-ac4b-516e8130ddc2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/cyberagent_calm3-22b-chat/1762652580.118237",
- "retrieved_timestamp": "1762652580.118238",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "cyberagent/calm3-22b-chat",
- "developer": "cyberagent",
- "inference_platform": "unknown",
- "id": "cyberagent/calm3-22b-chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.509131327100981
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4991683247746046
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06948640483383686
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27684563758389263
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45532291666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29496343085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 22.543
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/darkc0de/darkc0de_BuddyGlassNeverSleeps/675f6dfe-c623-4694-94cb-8705aab5521f.json b/leaderboard_data/HFOpenLLMv2/darkc0de/darkc0de_BuddyGlassNeverSleeps/675f6dfe-c623-4694-94cb-8705aab5521f.json
deleted file mode 100644
index 66e7450af41194e8b3d0fbb45e501edc0da300db..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/darkc0de/darkc0de_BuddyGlassNeverSleeps/675f6dfe-c623-4694-94cb-8705aab5521f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/darkc0de_BuddyGlassNeverSleeps/1762652580.1184928",
- "retrieved_timestamp": "1762652580.118494",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "darkc0de/BuddyGlassNeverSleeps",
- "developer": "darkc0de",
- "inference_platform": "unknown",
- "id": "darkc0de/BuddyGlassNeverSleeps"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4239019135892764
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49772281653646816
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06268882175226587
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3992708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34524601063829785
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/darkc0de/darkc0de_BuddyGlassUncensored2025.2/ea8dfb5f-750d-4573-a2bb-dadafc3a73b7.json b/leaderboard_data/HFOpenLLMv2/darkc0de/darkc0de_BuddyGlassUncensored2025.2/ea8dfb5f-750d-4573-a2bb-dadafc3a73b7.json
deleted file mode 100644
index f2d90db12d565da7187b8c11b95ab773c72a90f3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/darkc0de/darkc0de_BuddyGlassUncensored2025.2/ea8dfb5f-750d-4573-a2bb-dadafc3a73b7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/darkc0de_BuddyGlassUncensored2025.2/1762652580.118735",
- "retrieved_timestamp": "1762652580.1187358",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "darkc0de/BuddyGlassUncensored2025.2",
- "developer": "darkc0de",
- "inference_platform": "unknown",
- "id": "darkc0de/BuddyGlassUncensored2025.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7731131176389756
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6095411371819216
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24018126888217523
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32802013422818793
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4070833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43359375
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.306
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/darkc0de/darkc0de_BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp/adf85459-eba0-48a8-ad54-1e17d1ea5b31.json b/leaderboard_data/HFOpenLLMv2/darkc0de/darkc0de_BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp/adf85459-eba0-48a8-ad54-1e17d1ea5b31.json
deleted file mode 100644
index 5149d1b9e15688ffca5b8814887afe3a3e1a0601..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/darkc0de/darkc0de_BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp/adf85459-eba0-48a8-ad54-1e17d1ea5b31.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/darkc0de_BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp/1762652580.1189609",
- "retrieved_timestamp": "1762652580.1189609",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp",
- "developer": "darkc0de",
- "inference_platform": "unknown",
- "id": "darkc0de/BuddyGlass_v0.3_Xortron7MethedUpSwitchedUp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43584245357872664
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5243087998656722
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1283987915407855
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4143333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36727061170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.007
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dbrx-base/17febb53-0735-4983-8049-85319818ab84.json b/leaderboard_data/HFOpenLLMv2/databricks/databricks_dbrx-base/17febb53-0735-4983-8049-85319818ab84.json
deleted file mode 100644
index e09c2c814fe12578c1ff937ff44858b218307db0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dbrx-base/17febb53-0735-4983-8049-85319818ab84.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/databricks_dbrx-base/1762652580.1191711",
- "retrieved_timestamp": "1762652580.1191711",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "databricks/dbrx-base",
- "developer": "databricks",
- "inference_platform": "unknown",
- "id": "databricks/dbrx-base"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08214723926380368
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5195833333333334
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32666666666666666
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4066666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Unknown",
- "params_billions": 0.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dbrx-instruct/639e4921-9fa8-446d-b539-f03a7589b142.json b/leaderboard_data/HFOpenLLMv2/databricks/databricks_dbrx-instruct/639e4921-9fa8-446d-b539-f03a7589b142.json
deleted file mode 100644
index 73288ef798a6d4ba2caf8278dc3f9053130cd52f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dbrx-instruct/639e4921-9fa8-446d-b539-f03a7589b142.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/databricks_dbrx-instruct/1762652580.119466",
- "retrieved_timestamp": "1762652580.119467",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "databricks/dbrx-instruct",
- "developer": "databricks",
- "inference_platform": "unknown",
- "id": "databricks/dbrx-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5415796752616391
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5428960796934387
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06873111782477341
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3414429530201342
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42692708333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36826795212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "DbrxForCausalLM",
- "params_billions": 131.597
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v1-6b/62299ec1-dd42-4751-a224-3bdda71d3cdf.json b/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v1-6b/62299ec1-dd42-4751-a224-3bdda71d3cdf.json
deleted file mode 100644
index 2d277c2d89a39fe15e77251089ed40c168fb8464..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v1-6b/62299ec1-dd42-4751-a224-3bdda71d3cdf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/databricks_dolly-v1-6b/1762652580.1196742",
- "retrieved_timestamp": "1762652580.119675",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "databricks/dolly-v1-6b",
- "developer": "databricks",
- "inference_platform": "unknown",
- "id": "databricks/dolly-v1-6b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22244311759464885
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3172089528774696
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0188821752265861
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40041666666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12657912234042554
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GPTJForCausalLM",
- "params_billions": 6.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v2-12b/c83e2bf0-5d4e-45c4-aff2-27aea2bc0fb6.json b/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v2-12b/c83e2bf0-5d4e-45c4-aff2-27aea2bc0fb6.json
deleted file mode 100644
index 6e9ddbcf465b8cbc27ce324123069aa3eeab0e43..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v2-12b/c83e2bf0-5d4e-45c4-aff2-27aea2bc0fb6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/databricks_dolly-v2-12b/1762652580.1198819",
- "retrieved_timestamp": "1762652580.119883",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "databricks/dolly-v2-12b",
- "developer": "databricks",
- "inference_platform": "unknown",
- "id": "databricks/dolly-v2-12b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23550734273948679
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33199731673771277
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.013595166163141994
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2407718120805369
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37390625000000005
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11286569148936171
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GPTNeoXForCausalLM",
- "params_billions": 12.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v2-3b/a8838707-f188-440e-801f-e780e0dd362a.json b/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v2-3b/a8838707-f188-440e-801f-e780e0dd362a.json
deleted file mode 100644
index 389c515efd0b149289d0424bdcf9df4e347ba3a4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v2-3b/a8838707-f188-440e-801f-e780e0dd362a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/databricks_dolly-v2-3b/1762652580.1200871",
- "retrieved_timestamp": "1762652580.1200871",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "databricks/dolly-v2-3b",
- "developer": "databricks",
- "inference_platform": "unknown",
- "id": "databricks/dolly-v2-3b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22471597583301195
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30792785961544844
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015105740181268883
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33378125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11452792553191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GPTNeoXForCausalLM",
- "params_billions": 3.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v2-7b/68f999d7-2dc2-4b3c-ab02-6140387893c0.json b/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v2-7b/68f999d7-2dc2-4b3c-ab02-6140387893c0.json
deleted file mode 100644
index 83c7f17dfaace5736ae90d3da97bec65ad5b046c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/databricks/databricks_dolly-v2-7b/68f999d7-2dc2-4b3c-ab02-6140387893c0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/databricks_dolly-v2-7b/1762652580.120286",
- "retrieved_timestamp": "1762652580.120287",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "databricks/dolly-v2-7b",
- "developer": "databricks",
- "inference_platform": "unknown",
- "id": "databricks/dolly-v2-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2009856070781083
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31730628122070326
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.014350453172205438
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35530208333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1149434840425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GPTNeoXForCausalLM",
- "params_billions": 7.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/davidkim205/davidkim205_Rhea-72b-v0.5/106de4e2-a8d3-40d3-bdbc-0b95930e9ba6.json b/leaderboard_data/HFOpenLLMv2/davidkim205/davidkim205_Rhea-72b-v0.5/106de4e2-a8d3-40d3-bdbc-0b95930e9ba6.json
deleted file mode 100644
index 91c621166bae059b694e8bc17f674838102d9158..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/davidkim205/davidkim205_Rhea-72b-v0.5/106de4e2-a8d3-40d3-bdbc-0b95930e9ba6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/davidkim205_Rhea-72b-v0.5/1762652580.1208682",
- "retrieved_timestamp": "1762652580.1208699",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "davidkim205/Rhea-72b-v0.5",
- "developer": "davidkim205",
- "inference_platform": "unknown",
- "id": "davidkim205/Rhea-72b-v0.5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.014538092261865185
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30783395929068597
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17371601208459214
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2525167785234899
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42413541666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11660571808510638
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 72.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/davidkim205/davidkim205_nox-solar-10.7b-v4/fcc755d0-6269-49e6-890b-4a14417601a1.json b/leaderboard_data/HFOpenLLMv2/davidkim205/davidkim205_nox-solar-10.7b-v4/fcc755d0-6269-49e6-890b-4a14417601a1.json
deleted file mode 100644
index 31d77ef786c94624460d1116dfb7ca207d3dc822..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/davidkim205/davidkim205_nox-solar-10.7b-v4/fcc755d0-6269-49e6-890b-4a14417601a1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/davidkim205_nox-solar-10.7b-v4/1762652580.1212",
- "retrieved_timestamp": "1762652580.1212008",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "davidkim205/nox-solar-10.7b-v4",
- "developer": "davidkim205",
- "inference_platform": "unknown",
- "id": "davidkim205/nox-solar-10.7b-v4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3753418706809044
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4814038018918371
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.008308157099697885
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42984375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3332779255319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.732
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-llm-67b-chat/eeea1c5c-bf81-4533-aace-ccb85153320f.json b/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-llm-67b-chat/eeea1c5c-bf81-4533-aace-ccb85153320f.json
deleted file mode 100644
index 056c4fa8610d864e25b36d0db953881ebd01649f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-llm-67b-chat/eeea1c5c-bf81-4533-aace-ccb85153320f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-llm-67b-chat/1762652580.1230679",
- "retrieved_timestamp": "1762652580.1230688",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "deepseek-ai/deepseek-llm-67b-chat",
- "developer": "deepseek-ai",
- "inference_platform": "unknown",
- "id": "deepseek-ai/deepseek-llm-67b-chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5587153197959193
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5243416179742358
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09290030211480363
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5058645833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3943650265957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 67.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-llm-7b-base/e11d46c2-c121-4c74-94ae-e6ec9a5898af.json b/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-llm-7b-base/e11d46c2-c121-4c74-94ae-e6ec9a5898af.json
deleted file mode 100644
index 47dcc8706007115b9f8025b27bb373f546a9304c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-llm-7b-base/e11d46c2-c121-4c74-94ae-e6ec9a5898af.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-llm-7b-base/1762652580.1234062",
- "retrieved_timestamp": "1762652580.1234071",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "deepseek-ai/deepseek-llm-7b-base",
- "developer": "deepseek-ai",
- "inference_platform": "unknown",
- "id": "deepseek-ai/deepseek-llm-7b-base"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.217871913190335
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35030315829299524
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.019637462235649546
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27348993288590606
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37378124999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18060172872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 7.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-llm-7b-chat/b9dd96f5-6ab0-4df4-9ee2-bd34c4c9fb05.json b/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-llm-7b-chat/b9dd96f5-6ab0-4df4-9ee2-bd34c4c9fb05.json
deleted file mode 100644
index aade62d711ddb6c37b8621a550081f298ea8af08..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-llm-7b-chat/b9dd96f5-6ab0-4df4-9ee2-bd34c4c9fb05.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-llm-7b-chat/1762652580.123629",
- "retrieved_timestamp": "1762652580.12363",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "deepseek-ai/deepseek-llm-7b-chat",
- "developer": "deepseek-ai",
- "inference_platform": "unknown",
- "id": "deepseek-ai/deepseek-llm-7b-chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4170822307034225
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3632079760108669
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02039274924471299
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46677083333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21334773936170212
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 7.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-moe-16b-base/32767af1-f01b-42ca-a8e2-6fecc5af4bfc.json b/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-moe-16b-base/32767af1-f01b-42ca-a8e2-6fecc5af4bfc.json
deleted file mode 100644
index 01dd2b9ecfac46392a8c10a42e6e87a078e4f1b9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-moe-16b-base/32767af1-f01b-42ca-a8e2-6fecc5af4bfc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-moe-16b-base/1762652580.123848",
- "retrieved_timestamp": "1762652580.123849",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "deepseek-ai/deepseek-moe-16b-base",
- "developer": "deepseek-ai",
- "inference_platform": "unknown",
- "id": "deepseek-ai/deepseek-moe-16b-base"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2449744455821664
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3409461055246395
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02416918429003021
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25419463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36578125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1505152925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "DeepseekForCausalLM",
- "params_billions": 16.376
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-moe-16b-chat/81c514f2-5a06-4d50-8c00-dc8b97529f46.json b/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-moe-16b-chat/81c514f2-5a06-4d50-8c00-dc8b97529f46.json
deleted file mode 100644
index ba3087ba1181987d93b3800330afc2543d234dac..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/deepseek-ai/deepseek-ai_deepseek-moe-16b-chat/81c514f2-5a06-4d50-8c00-dc8b97529f46.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/deepseek-ai_deepseek-moe-16b-chat/1762652580.1240609",
- "retrieved_timestamp": "1762652580.124062",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "deepseek-ai/deepseek-moe-16b-chat",
- "developer": "deepseek-ai",
- "inference_platform": "unknown",
- "id": "deepseek-ai/deepseek-moe-16b-chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36629919724109805
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3274953026448241
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0256797583081571
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22483221476510068
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38076041666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1963929521276596
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "DeepseekForCausalLM",
- "params_billions": 16.376
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dfurman/dfurman_CalmeRys-78B-Orpo-v0.1/31d8cf18-7b35-438e-8dc6-cdba0f593348.json b/leaderboard_data/HFOpenLLMv2/dfurman/dfurman_CalmeRys-78B-Orpo-v0.1/31d8cf18-7b35-438e-8dc6-cdba0f593348.json
deleted file mode 100644
index 25f02e5af468c6aad25a9be8bcc543c3dc84d37f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dfurman/dfurman_CalmeRys-78B-Orpo-v0.1/31d8cf18-7b35-438e-8dc6-cdba0f593348.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dfurman_CalmeRys-78B-Orpo-v0.1/1762652580.124436",
- "retrieved_timestamp": "1762652580.124437",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dfurman/CalmeRys-78B-Orpo-v0.1",
- "developer": "dfurman",
- "inference_platform": "unknown",
- "id": "dfurman/CalmeRys-78B-Orpo-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8163273447785211
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7262282792249927
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40634441087613293
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4001677852348993
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5901770833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7012134308510638
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 77.965
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dicta-il/dicta-il_dictalm2.0-instruct/4fc01471-7a04-4f46-a973-42f5a3fd67be.json b/leaderboard_data/HFOpenLLMv2/dicta-il/dicta-il_dictalm2.0-instruct/4fc01471-7a04-4f46-a973-42f5a3fd67be.json
deleted file mode 100644
index 292a1e9e36cda4e042ba96162c77f23f5ef5d27e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dicta-il/dicta-il_dictalm2.0-instruct/4fc01471-7a04-4f46-a973-42f5a3fd67be.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dicta-il_dictalm2.0-instruct/1762652580.126274",
- "retrieved_timestamp": "1762652580.126276",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dicta-il/dictalm2.0-instruct",
- "developer": "dicta-il",
- "inference_platform": "unknown",
- "id": "dicta-il/dictalm2.0-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44121264910437635
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42560784985912875
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.022658610271903322
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39458333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2604720744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.251
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dicta-il/dicta-il_dictalm2.0/613c1922-270a-4e8b-ae9d-20fa25573258.json b/leaderboard_data/HFOpenLLMv2/dicta-il/dicta-il_dictalm2.0/613c1922-270a-4e8b-ae9d-20fa25573258.json
deleted file mode 100644
index 56fed06a5aa3108335f8dba330a1f382fc01a2f2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dicta-il/dicta-il_dictalm2.0/613c1922-270a-4e8b-ae9d-20fa25573258.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dicta-il_dictalm2.0/1762652580.125907",
- "retrieved_timestamp": "1762652580.125909",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dicta-il/dictalm2.0",
- "developer": "dicta-il",
- "inference_platform": "unknown",
- "id": "dicta-il/dictalm2.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24132745559559746
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4017869112495909
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01812688821752266
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38196874999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2604720744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.251
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/divyanshukunwar/divyanshukunwar_SASTRI_1_9B/f0ccf0c5-269f-46e1-a13e-b54f2903779b.json b/leaderboard_data/HFOpenLLMv2/divyanshukunwar/divyanshukunwar_SASTRI_1_9B/f0ccf0c5-269f-46e1-a13e-b54f2903779b.json
deleted file mode 100644
index 443b8a915bd2d62bb14d0ab7f336e9c14b09c315..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/divyanshukunwar/divyanshukunwar_SASTRI_1_9B/f0ccf0c5-269f-46e1-a13e-b54f2903779b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/divyanshukunwar_SASTRI_1_9B/1762652580.1269271",
- "retrieved_timestamp": "1762652580.1269279",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "divyanshukunwar/SASTRI_1_9B",
- "developer": "divyanshukunwar",
- "inference_platform": "unknown",
- "id": "divyanshukunwar/SASTRI_1_9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4207292206899914
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4680499051118341
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11555891238670694
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3213087248322148
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3831145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3187333776595745
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 5.211
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/djuna-test-lab/djuna-test-lab_TEST-L3.2-ReWish-3B-ties-w-base/f64d7325-38eb-4cd4-80b3-bd63d4acb72f.json b/leaderboard_data/HFOpenLLMv2/djuna-test-lab/djuna-test-lab_TEST-L3.2-ReWish-3B-ties-w-base/f64d7325-38eb-4cd4-80b3-bd63d4acb72f.json
deleted file mode 100644
index e9a4c922f5e5e894239200083c8e052a8101a59e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/djuna-test-lab/djuna-test-lab_TEST-L3.2-ReWish-3B-ties-w-base/f64d7325-38eb-4cd4-80b3-bd63d4acb72f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/djuna-test-lab_TEST-L3.2-ReWish-3B-ties-w-base/1762652580.131253",
- "retrieved_timestamp": "1762652580.131254",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base",
- "developer": "djuna-test-lab",
- "inference_platform": "unknown",
- "id": "djuna-test-lab/TEST-L3.2-ReWish-3B-ties-w-base"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.635252241829457
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.449540552927623
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13670694864048338
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2835570469798658
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37775
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31258311170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/djuna-test-lab/djuna-test-lab_TEST-L3.2-ReWish-3B/6d57a63e-0fa7-442b-9156-5a8985e04762.json b/leaderboard_data/HFOpenLLMv2/djuna-test-lab/djuna-test-lab_TEST-L3.2-ReWish-3B/6d57a63e-0fa7-442b-9156-5a8985e04762.json
deleted file mode 100644
index 1cea8806eb208f1fedb53210dfe765fb486523a9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/djuna-test-lab/djuna-test-lab_TEST-L3.2-ReWish-3B/6d57a63e-0fa7-442b-9156-5a8985e04762.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/djuna-test-lab_TEST-L3.2-ReWish-3B/1762652580.131",
- "retrieved_timestamp": "1762652580.131001",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "djuna-test-lab/TEST-L3.2-ReWish-3B",
- "developer": "djuna-test-lab",
- "inference_platform": "unknown",
- "id": "djuna-test-lab/TEST-L3.2-ReWish-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6367759766308949
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.449540552927623
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13670694864048338
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2835570469798658
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37775
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31258311170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_G2-BigGSHT-27B-2/69cc67cc-52f9-464a-ab04-b00bb3d8c459.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_G2-BigGSHT-27B-2/69cc67cc-52f9-464a-ab04-b00bb3d8c459.json
deleted file mode 100644
index 03d916782d7f3bb86cbd55bb5e77e9eac64992f6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_G2-BigGSHT-27B-2/69cc67cc-52f9-464a-ab04-b00bb3d8c459.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/djuna_G2-BigGSHT-27B-2/1762652580.1272058",
- "retrieved_timestamp": "1762652580.1272068",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "djuna/G2-BigGSHT-27B-2",
- "developer": "djuna",
- "inference_platform": "unknown",
- "id": "djuna/G2-BigGSHT-27B-2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7974430067775724
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.641474454273013
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2348942598187311
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36325503355704697
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40720833333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45279255319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_G2-GSHT/b012b4a9-52d9-4b75-b80d-819579572f05.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_G2-GSHT/b012b4a9-52d9-4b75-b80d-819579572f05.json
deleted file mode 100644
index 166684ee1b01ceb21a18fe08f7002a253f84353d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_G2-GSHT/b012b4a9-52d9-4b75-b80d-819579572f05.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/djuna_G2-GSHT/1762652580.127527",
- "retrieved_timestamp": "1762652580.127528",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "djuna/G2-GSHT",
- "developer": "djuna",
- "inference_platform": "unknown",
- "id": "djuna/G2-GSHT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5630116978505919
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5269730491270207
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19259818731117825
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32550335570469796
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40057291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070146276595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-ForStHS/2d9e083d-2c5e-4f42-ab27-6f0c150ee4db.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-ForStHS/2d9e083d-2c5e-4f42-ab27-6f0c150ee4db.json
deleted file mode 100644
index cd6dead2f9a89ffb5989b1398b0aef44e3848e3b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-ForStHS/2d9e083d-2c5e-4f42-ab27-6f0c150ee4db.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/djuna_L3.1-ForStHS/1762652580.128124",
- "retrieved_timestamp": "1762652580.128125",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "djuna/L3.1-ForStHS",
- "developer": "djuna",
- "inference_platform": "unknown",
- "id": "djuna/L3.1-ForStHS"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7813313120298586
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5202703381267152
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15030211480362538
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40264583333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37350398936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Promissum_Mane-8B-Della-1.5-calc/f738c507-0826-4d7a-a999-8a01274d8697.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Promissum_Mane-8B-Della-1.5-calc/f738c507-0826-4d7a-a999-8a01274d8697.json
deleted file mode 100644
index bc7e36f8fd5d63c782a8a38080f40b68a31968c4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Promissum_Mane-8B-Della-1.5-calc/f738c507-0826-4d7a-a999-8a01274d8697.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/djuna_L3.1-Promissum_Mane-8B-Della-1.5-calc/1762652580.1283488",
- "retrieved_timestamp": "1762652580.12835",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc",
- "developer": "djuna",
- "inference_platform": "unknown",
- "id": "djuna/L3.1-Promissum_Mane-8B-Della-1.5-calc"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7235291249440374
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5432920704935255
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16389728096676737
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3145973154362416
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42528125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.390375664893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Promissum_Mane-8B-Della-calc/54d2c316-3c41-4d13-879d-a23c071a6885.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Promissum_Mane-8B-Della-calc/54d2c316-3c41-4d13-879d-a23c071a6885.json
deleted file mode 100644
index 52c4e55aa6920709deff5ed378a664963c3bda9d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Promissum_Mane-8B-Della-calc/54d2c316-3c41-4d13-879d-a23c071a6885.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/djuna_L3.1-Promissum_Mane-8B-Della-calc/1762652580.128573",
- "retrieved_timestamp": "1762652580.128574",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "djuna/L3.1-Promissum_Mane-8B-Della-calc",
- "developer": "djuna",
- "inference_platform": "unknown",
- "id": "djuna/L3.1-Promissum_Mane-8B-Della-calc"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.544152847777231
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.548587625935678
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18429003021148035
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4229895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3801529255319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Purosani-2-8B/f1cc7f8d-72da-40ef-8cb1-f069cd0c052e.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Purosani-2-8B/f1cc7f8d-72da-40ef-8cb1-f069cd0c052e.json
deleted file mode 100644
index ca8de8972bde41f6227dde6b4c490809a48570c2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Purosani-2-8B/f1cc7f8d-72da-40ef-8cb1-f069cd0c052e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/djuna_L3.1-Purosani-2-8B/1762652580.128782",
- "retrieved_timestamp": "1762652580.128783",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "djuna/L3.1-Purosani-2-8B",
- "developer": "djuna",
- "inference_platform": "unknown",
- "id": "djuna/L3.1-Purosani-2-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4988153654525548
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5182122256069372
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11706948640483383
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011744966442953
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38162499999999994
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3751662234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Suze-Vume-calc/3a48a9ec-61a5-45fd-903a-de2ef90ef13e.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Suze-Vume-calc/3a48a9ec-61a5-45fd-903a-de2ef90ef13e.json
deleted file mode 100644
index ed3eadb3c6476a33c6d07cbf927aaf2f353c1c35..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_L3.1-Suze-Vume-calc/3a48a9ec-61a5-45fd-903a-de2ef90ef13e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/djuna_L3.1-Suze-Vume-calc/1762652580.128992",
- "retrieved_timestamp": "1762652580.128992",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "djuna/L3.1-Suze-Vume-calc",
- "developer": "djuna",
- "inference_platform": "unknown",
- "id": "djuna/L3.1-Suze-Vume-calc"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7296739318341999
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.516421105092519
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11404833836858005
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28187919463087246
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38429166666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35147938829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun-12B-2/7b384a2a-50c5-4c04-a9dd-5a9acefbd81f.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun-12B-2/7b384a2a-50c5-4c04-a9dd-5a9acefbd81f.json
deleted file mode 100644
index 15cc006f70a60e4f2d9eca7e62863b8a267151e2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun-12B-2/7b384a2a-50c5-4c04-a9dd-5a9acefbd81f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/djuna_MN-Chinofun-12B-2/1762652580.129499",
- "retrieved_timestamp": "1762652580.1295",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "djuna/MN-Chinofun-12B-2",
- "developer": "djuna",
- "inference_platform": "unknown",
- "id": "djuna/MN-Chinofun-12B-2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6170671595810228
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5036959998266032
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13066465256797583
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42683333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3615359042553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun-12B-3/32a4d80a-9d28-47f4-b68f-36e95a400bf2.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun-12B-3/32a4d80a-9d28-47f4-b68f-36e95a400bf2.json
deleted file mode 100644
index 8c1b940741195f0225dc52cf063e778bdfd7ad6c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun-12B-3/32a4d80a-9d28-47f4-b68f-36e95a400bf2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/djuna_MN-Chinofun-12B-3/1762652580.129836",
- "retrieved_timestamp": "1762652580.129837",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "djuna/MN-Chinofun-12B-3",
- "developer": "djuna",
- "inference_platform": "unknown",
- "id": "djuna/MN-Chinofun-12B-3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3052744495715812
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.53478574603334
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10045317220543806
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4197916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3026097074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun-12B-4/4f09e60c-e68a-426c-ac7e-f5e6755e14be.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun-12B-4/4f09e60c-e68a-426c-ac7e-f5e6755e14be.json
deleted file mode 100644
index a90ff54cc784823af88c862931f9bc04a48d4ea5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun-12B-4/4f09e60c-e68a-426c-ac7e-f5e6755e14be.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/djuna_MN-Chinofun-12B-4/1762652580.13009",
- "retrieved_timestamp": "1762652580.130091",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "djuna/MN-Chinofun-12B-4",
- "developer": "djuna",
- "inference_platform": "unknown",
- "id": "djuna/MN-Chinofun-12B-4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5404305021786637
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5347693369790583
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11178247734138973
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4306770833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3497340425531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun/023756a1-66cc-423a-803b-0d8b0f368bd2.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun/023756a1-66cc-423a-803b-0d8b0f368bd2.json
deleted file mode 100644
index c2b824b9b526698c61ddc10ec42dd89f0878df04..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_MN-Chinofun/023756a1-66cc-423a-803b-0d8b0f368bd2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/djuna_MN-Chinofun/1762652580.1291971",
- "retrieved_timestamp": "1762652580.1291971",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "djuna/MN-Chinofun",
- "developer": "djuna",
- "inference_platform": "unknown",
- "id": "djuna/MN-Chinofun"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6110220880596817
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49527033812671534
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13066465256797583
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40835416666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36028922872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_Q2.5-Partron-7B/b045b20a-cdbf-4495-89ae-b235ada2e9e0.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_Q2.5-Partron-7B/b045b20a-cdbf-4495-89ae-b235ada2e9e0.json
deleted file mode 100644
index bf2114f9a6266688be8fd42e2869869f08ab84f2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_Q2.5-Partron-7B/b045b20a-cdbf-4495-89ae-b235ada2e9e0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/djuna_Q2.5-Partron-7B/1762652580.130363",
- "retrieved_timestamp": "1762652580.130364",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "djuna/Q2.5-Partron-7B",
- "developer": "djuna",
- "inference_platform": "unknown",
- "id": "djuna/Q2.5-Partron-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7321218810533828
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5418474850726388
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4826283987915408
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41654166666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4282746010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_Q2.5-Veltha-14B-0.5/258520cb-360a-4629-be8e-e4ffca8a81b2.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_Q2.5-Veltha-14B-0.5/258520cb-360a-4629-be8e-e4ffca8a81b2.json
deleted file mode 100644
index f99e4a9ce14ef4b644779e91aa682f23e283237b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_Q2.5-Veltha-14B-0.5/258520cb-360a-4629-be8e-e4ffca8a81b2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/djuna_Q2.5-Veltha-14B-0.5/1762652580.13079",
- "retrieved_timestamp": "1762652580.130791",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "djuna/Q2.5-Veltha-14B-0.5",
- "developer": "djuna",
- "inference_platform": "unknown",
- "id": "djuna/Q2.5-Veltha-14B-0.5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7795826185631901
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6523026688308357
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43731117824773413
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36828859060402686
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43390625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5295046542553191
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/djuna/djuna_Q2.5-Veltha-14B/0a9560cd-d3e2-4d41-b83c-f321bcfc9c3c.json b/leaderboard_data/HFOpenLLMv2/djuna/djuna_Q2.5-Veltha-14B/0a9560cd-d3e2-4d41-b83c-f321bcfc9c3c.json
deleted file mode 100644
index 1b7da584c5836128234a01d30b0ed44493a3f73e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/djuna/djuna_Q2.5-Veltha-14B/0a9560cd-d3e2-4d41-b83c-f321bcfc9c3c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/djuna_Q2.5-Veltha-14B/1762652580.130576",
- "retrieved_timestamp": "1762652580.1305768",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "djuna/Q2.5-Veltha-14B",
- "developer": "djuna",
- "inference_platform": "unknown",
- "id": "djuna/Q2.5-Veltha-14B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8291666112581284
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.648421390292023
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4788519637462236
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35906040268456374
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41942708333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5298371010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3-8B-Instruct/85472ae2-d5f0-4896-811b-d4217241bcef.json b/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3-8B-Instruct/85472ae2-d5f0-4896-811b-d4217241bcef.json
deleted file mode 100644
index 799da164efbd3cf978b52263f3cd58f8fff3f69a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3-8B-Instruct/85472ae2-d5f0-4896-811b-d4217241bcef.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama-3-8B-Instruct/1762652580.131744",
- "retrieved_timestamp": "1762652580.131744",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dnhkng/RYS-Llama-3-8B-Instruct",
- "developer": "dnhkng",
- "inference_platform": "unknown",
- "id": "dnhkng/RYS-Llama-3-8B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6957772044841022
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4808708123069005
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06873111782477341
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33834375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.355718085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3-Huge-Instruct/0e8dfce1-b0d3-4ba5-a3be-ba6f52421841.json b/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3-Huge-Instruct/0e8dfce1-b0d3-4ba5-a3be-ba6f52421841.json
deleted file mode 100644
index 431b46ae7241d28f69f44cdf54b3ed251108222b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3-Huge-Instruct/0e8dfce1-b0d3-4ba5-a3be-ba6f52421841.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama-3-Huge-Instruct/1762652580.1319628",
- "retrieved_timestamp": "1762652580.131964",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dnhkng/RYS-Llama-3-Huge-Instruct",
- "developer": "dnhkng",
- "inference_platform": "unknown",
- "id": "dnhkng/RYS-Llama-3-Huge-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7685917809190725
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6480872171360044
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22885196374622357
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4207604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.510970744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 99.646
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3-Large-Instruct/f9485436-6935-422f-9eb1-ee7faeb231d1.json b/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3-Large-Instruct/f9485436-6935-422f-9eb1-ee7faeb231d1.json
deleted file mode 100644
index 086883e9115a3a11d7748c8f2e93a9cc1b1089fc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3-Large-Instruct/f9485436-6935-422f-9eb1-ee7faeb231d1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama-3-Large-Instruct/1762652580.132239",
- "retrieved_timestamp": "1762652580.132241",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dnhkng/RYS-Llama-3-Large-Instruct",
- "developer": "dnhkng",
- "inference_platform": "unknown",
- "id": "dnhkng/RYS-Llama-3-Large-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8050616807847621
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.65252690724939
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23036253776435045
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41803125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5137134308510638
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 73.976
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3.1-8B-Instruct/62dab9bd-df83-4a0b-be94-0ddd981da6e4.json b/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3.1-8B-Instruct/62dab9bd-df83-4a0b-be94-0ddd981da6e4.json
deleted file mode 100644
index 8031673a6f3b0cc049111b1d8bd05a3c4e8f589e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Llama-3.1-8B-Instruct/62dab9bd-df83-4a0b-be94-0ddd981da6e4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Llama-3.1-8B-Instruct/1762652580.132753",
- "retrieved_timestamp": "1762652580.1327538",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dnhkng/RYS-Llama-3.1-8B-Instruct",
- "developer": "dnhkng",
- "inference_platform": "unknown",
- "id": "dnhkng/RYS-Llama-3.1-8B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7684920455502511
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5163645317446665
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13293051359516617
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3681041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36394614361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "?",
- "params_billions": 8.685
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Medium/ca1e127b-ded1-4015-85b9-be134c26644d.json b/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Medium/ca1e127b-ded1-4015-85b9-be134c26644d.json
deleted file mode 100644
index c65070ff874a4e0e3b4dd8ab6cb10923f62d4ade..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Medium/ca1e127b-ded1-4015-85b9-be134c26644d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Medium/1762652580.131469",
- "retrieved_timestamp": "1762652580.13147",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dnhkng/RYS-Medium",
- "developer": "dnhkng",
- "inference_platform": "unknown",
- "id": "dnhkng/RYS-Medium"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4406131287206833
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6284726872432828
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10800604229607251
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32802013422818793
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40692708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4325964095744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Phi3ForCausalLM",
- "params_billions": 18.731
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Phi-3-medium-4k-instruct/94f92919-36fb-4aed-8c0c-2bee0cd1d301.json b/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Phi-3-medium-4k-instruct/94f92919-36fb-4aed-8c0c-2bee0cd1d301.json
deleted file mode 100644
index 3f8b02d4c4d01c350319de2b1234e9f1bd576dcc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-Phi-3-medium-4k-instruct/94f92919-36fb-4aed-8c0c-2bee0cd1d301.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dnhkng_RYS-Phi-3-medium-4k-instruct/1762652580.133586",
- "retrieved_timestamp": "1762652580.133587",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dnhkng/RYS-Phi-3-medium-4k-instruct",
- "developer": "dnhkng",
- "inference_platform": "unknown",
- "id": "dnhkng/RYS-Phi-3-medium-4k-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4391392616036561
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6226313539198264
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1608761329305136
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3548657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42528125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.484624335106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Phi3ForCausalLM",
- "params_billions": 17.709
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-XLarge-base/1b0bb4ca-9553-4ddd-bf35-cab66685668d.json b/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-XLarge-base/1b0bb4ca-9553-4ddd-bf35-cab66685668d.json
deleted file mode 100644
index 133d4499adb35475aafe598cb86aabebaf6bb721..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-XLarge-base/1b0bb4ca-9553-4ddd-bf35-cab66685668d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dnhkng_RYS-XLarge-base/1762652580.134071",
- "retrieved_timestamp": "1762652580.134072",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dnhkng/RYS-XLarge-base",
- "developer": "dnhkng",
- "inference_platform": "unknown",
- "id": "dnhkng/RYS-XLarge-base"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7910233735377686
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7047291858548728
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37915407854984895
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37919463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4902708333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5430518617021277
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 77.972
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-XLarge/a2a90b7e-f6db-408a-b5df-284d0b4a6353.json b/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-XLarge/a2a90b7e-f6db-408a-b5df-284d0b4a6353.json
deleted file mode 100644
index a390a50e0de3a43febab80a65d9ef31ff99081f0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-XLarge/a2a90b7e-f6db-408a-b5df-284d0b4a6353.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dnhkng_RYS-XLarge/1762652580.1338398",
- "retrieved_timestamp": "1762652580.1338408",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dnhkng/RYS-XLarge",
- "developer": "dnhkng",
- "inference_platform": "unknown",
- "id": "dnhkng/RYS-XLarge"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7995662619627034
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7050033079850099
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.425226586102719
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38422818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49696875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5428025265957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 77.965
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-XLarge2/6f344c50-fdf3-477e-9a76-558ed61fd509.json b/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-XLarge2/6f344c50-fdf3-477e-9a76-558ed61fd509.json
deleted file mode 100644
index 9b7340a3ad032df7d60072cd2696a89b00a4c7b0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dnhkng/dnhkng_RYS-XLarge2/6f344c50-fdf3-477e-9a76-558ed61fd509.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dnhkng_RYS-XLarge2/1762652580.1343",
- "retrieved_timestamp": "1762652580.134301",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dnhkng/RYS-XLarge2",
- "developer": "dnhkng",
- "inference_platform": "unknown",
- "id": "dnhkng/RYS-XLarge2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49019712141562166
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6573947106260754
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27492447129909364
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37416107382550334
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4508020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5378158244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 77.965
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dreamgen/dreamgen_WizardLM-2-7B/5ed2650d-d76f-49d6-915b-ac551129913e.json b/leaderboard_data/HFOpenLLMv2/dreamgen/dreamgen_WizardLM-2-7B/5ed2650d-d76f-49d6-915b-ac551129913e.json
deleted file mode 100644
index bc1329462ba3a7d85316baca05d18c6266da54bf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dreamgen/dreamgen_WizardLM-2-7B/5ed2650d-d76f-49d6-915b-ac551129913e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dreamgen_WizardLM-2-7B/1762652580.1345458",
- "retrieved_timestamp": "1762652580.134547",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dreamgen/WizardLM-2-7B",
- "developer": "dreamgen",
- "inference_platform": "unknown",
- "id": "dreamgen/WizardLM-2-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45829842595424586
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34867856163972016
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03323262839879154
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28691275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39409374999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2660405585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v1/c402fb6f-6e91-4e33-b847-87371373a6eb.json b/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v1/c402fb6f-6e91-4e33-b847-87371373a6eb.json
deleted file mode 100644
index 1d0c653f5d8aa79e064ba4bc470ca03b0e1fb10c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v1/c402fb6f-6e91-4e33-b847-87371373a6eb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v1/1762652580.134872",
- "retrieved_timestamp": "1762652580.134874",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dustinwloring1988/Reflexis-8b-chat-v1",
- "developer": "dustinwloring1988",
- "inference_platform": "unknown",
- "id": "dustinwloring1988/Reflexis-8b-chat-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3657750324694034
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4663596290293861
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11555891238670694
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25419463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3753958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3384308510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v2/6475a1f1-0c12-4ab3-89fc-cc5aa1d8145e.json b/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v2/6475a1f1-0c12-4ab3-89fc-cc5aa1d8145e.json
deleted file mode 100644
index 3e3a84976812c92ad1b36fbf4a34797707249b0b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v2/6475a1f1-0c12-4ab3-89fc-cc5aa1d8145e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v2/1762652580.135156",
- "retrieved_timestamp": "1762652580.135157",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dustinwloring1988/Reflexis-8b-chat-v2",
- "developer": "dustinwloring1988",
- "inference_platform": "unknown",
- "id": "dustinwloring1988/Reflexis-8b-chat-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3912042270065648
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47238018945807153
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1163141993957704
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3526354166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3377659574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v3/5767ea0d-318c-4c65-9c96-890d27973302.json b/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v3/5767ea0d-318c-4c65-9c96-890d27973302.json
deleted file mode 100644
index 2cd71cf082e3557de357d5ab744551e8cef39fb1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v3/5767ea0d-318c-4c65-9c96-890d27973302.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v3/1762652580.1353788",
- "retrieved_timestamp": "1762652580.1353788",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dustinwloring1988/Reflexis-8b-chat-v3",
- "developer": "dustinwloring1988",
- "inference_platform": "unknown",
- "id": "dustinwloring1988/Reflexis-8b-chat-v3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.536733644507684
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4658310598309874
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12235649546827794
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2424496644295302
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35117708333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35480385638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v4/ad9e0902-3542-4994-ae42-4f3ef9f88ab1.json b/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v4/ad9e0902-3542-4994-ae42-4f3ef9f88ab1.json
deleted file mode 100644
index c07ced2a0369fafacae6840c46238267d9678fbc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v4/ad9e0902-3542-4994-ae42-4f3ef9f88ab1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v4/1762652580.135605",
- "retrieved_timestamp": "1762652580.135605",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dustinwloring1988/Reflexis-8b-chat-v4",
- "developer": "dustinwloring1988",
- "inference_platform": "unknown",
- "id": "dustinwloring1988/Reflexis-8b-chat-v4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4697890486132351
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46860140660011185
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1027190332326284
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23406040268456377
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33930208333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3390126329787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v5/01c33f76-994a-4a1c-951d-88b34e471498.json b/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v5/01c33f76-994a-4a1c-951d-88b34e471498.json
deleted file mode 100644
index bbda0bc1b8dc1765b7fdb1faad34218d7e838433..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v5/01c33f76-994a-4a1c-951d-88b34e471498.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v5/1762652580.135817",
- "retrieved_timestamp": "1762652580.135818",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dustinwloring1988/Reflexis-8b-chat-v5",
- "developer": "dustinwloring1988",
- "inference_platform": "unknown",
- "id": "dustinwloring1988/Reflexis-8b-chat-v5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42375231053604434
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4781685533183147
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1216012084592145
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33536458333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3217253989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v6/65ce9e6f-cab9-4ccc-af89-de9be928529e.json b/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v6/65ce9e6f-cab9-4ccc-af89-de9be928529e.json
deleted file mode 100644
index 6d5af7c44195c96276e66868d6d9a10da4f8208d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v6/65ce9e6f-cab9-4ccc-af89-de9be928529e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v6/1762652580.136029",
- "retrieved_timestamp": "1762652580.13603",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dustinwloring1988/Reflexis-8b-chat-v6",
- "developer": "dustinwloring1988",
- "inference_platform": "unknown",
- "id": "dustinwloring1988/Reflexis-8b-chat-v6"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4938939790866014
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4809537068664902
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1299093655589124
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3753333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.347905585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v7/abadd81a-bd45-4eba-ae77-25190c751085.json b/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v7/abadd81a-bd45-4eba-ae77-25190c751085.json
deleted file mode 100644
index d5b8689e32dc90dd790c9c37e57ac31919dabdbe..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dustinwloring1988/dustinwloring1988_Reflexis-8b-chat-v7/abadd81a-bd45-4eba-ae77-25190c751085.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dustinwloring1988_Reflexis-8b-chat-v7/1762652580.1362429",
- "retrieved_timestamp": "1762652580.136244",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dustinwloring1988/Reflexis-8b-chat-v7",
- "developer": "dustinwloring1988",
- "inference_platform": "unknown",
- "id": "dustinwloring1988/Reflexis-8b-chat-v7"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39804828964924177
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4809830787114964
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16314199395770393
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32215625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3642785904255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gemma-2-2b-id-instruct/73418e8c-ce10-4ea4-97f6-6f87c2be05a2.json b/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gemma-2-2b-id-instruct/73418e8c-ce10-4ea4-97f6-6f87c2be05a2.json
deleted file mode 100644
index 84a242d49f9116205299de9d51f3667d56b1464f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gemma-2-2b-id-instruct/73418e8c-ce10-4ea4-97f6-6f87c2be05a2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dwikitheduck_gemma-2-2b-id-instruct/1762652580.137409",
- "retrieved_timestamp": "1762652580.1374102",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dwikitheduck/gemma-2-2b-id-instruct",
- "developer": "dwikitheduck",
- "inference_platform": "unknown",
- "id": "dwikitheduck/gemma-2-2b-id-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38785644312646006
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39621721241423097
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.045317220543806644
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41542708333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21733710106382978
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gen-inst-1/5117b75d-3060-4434-a40d-01c471563685.json b/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gen-inst-1/5117b75d-3060-4434-a40d-01c471563685.json
deleted file mode 100644
index 571bbba1d5f90ad9c2bdf564386385ceef2e0348..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gen-inst-1/5117b75d-3060-4434-a40d-01c471563685.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dwikitheduck_gen-inst-1/1762652580.1376698",
- "retrieved_timestamp": "1762652580.137671",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dwikitheduck/gen-inst-1",
- "developer": "dwikitheduck",
- "inference_platform": "unknown",
- "id": "dwikitheduck/gen-inst-1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7750114141588762
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6419926671215591
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4554380664652568
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3716442953020134
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42054166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5088929521276596
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gen-try1-notemp/5bd29754-7f93-42fb-ba9b-7b3a4315bd17.json b/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gen-try1-notemp/5bd29754-7f93-42fb-ba9b-7b3a4315bd17.json
deleted file mode 100644
index 0e1da429d3a09ce1c34d4e72bc6e8a265913d4bc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gen-try1-notemp/5bd29754-7f93-42fb-ba9b-7b3a4315bd17.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dwikitheduck_gen-try1-notemp/1762652580.13809",
- "retrieved_timestamp": "1762652580.138091",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dwikitheduck/gen-try1-notemp",
- "developer": "dwikitheduck",
- "inference_platform": "unknown",
- "id": "dwikitheduck/gen-try1-notemp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26270961050013963
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.626267088306491
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31797583081570996
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3540268456375839
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47141666666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5210272606382979
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gen-try1/8f00112d-767f-4ac5-ae1c-e37781cf7eec.json b/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gen-try1/8f00112d-767f-4ac5-ae1c-e37781cf7eec.json
deleted file mode 100644
index e7d8115ffb943649601d2233f6a6fb3afd3c50d6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dwikitheduck/dwikitheduck_gen-try1/8f00112d-767f-4ac5-ae1c-e37781cf7eec.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dwikitheduck_gen-try1/1762652580.137886",
- "retrieved_timestamp": "1762652580.137887",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dwikitheduck/gen-try1",
- "developer": "dwikitheduck",
- "inference_platform": "unknown",
- "id": "dwikitheduck/gen-try1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7522052598217175
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6358510933470735
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41012084592145015
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3414429530201342
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4415625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5110538563829787
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/dzakwan/dzakwan_dzakwan-MoE-4x7b-Beta/f4ceacae-0b81-44ac-8b9d-31d81e145bab.json b/leaderboard_data/HFOpenLLMv2/dzakwan/dzakwan_dzakwan-MoE-4x7b-Beta/f4ceacae-0b81-44ac-8b9d-31d81e145bab.json
deleted file mode 100644
index e3f7597bab3ec508ffd751ce26dec40b40800044..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/dzakwan/dzakwan_dzakwan-MoE-4x7b-Beta/f4ceacae-0b81-44ac-8b9d-31d81e145bab.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dzakwan_dzakwan-MoE-4x7b-Beta/1762652580.138297",
- "retrieved_timestamp": "1762652580.138298",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dzakwan/dzakwan-MoE-4x7b-Beta",
- "developer": "dzakwan",
- "inference_platform": "unknown",
- "id": "dzakwan/dzakwan-MoE-4x7b-Beta"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44426011870725235
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.514044131159397
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07779456193353475
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2860738255033557
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42673958333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3107546542553192
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 24.154
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_Falcon3-8B-Franken-Basestruct/1653400c-137e-4745-8676-eeaf39bbcc13.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_Falcon3-8B-Franken-Basestruct/1653400c-137e-4745-8676-eeaf39bbcc13.json
deleted file mode 100644
index afd65e0fbc73fa8fa3f01ce0234aafb7a557a858..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_Falcon3-8B-Franken-Basestruct/1653400c-137e-4745-8676-eeaf39bbcc13.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_Falcon3-8B-Franken-Basestruct/1762652580.138562",
- "retrieved_timestamp": "1762652580.1385632",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/Falcon3-8B-Franken-Basestruct",
- "developer": "ehristoforu",
- "inference_platform": "unknown",
- "id": "ehristoforu/Falcon3-8B-Franken-Basestruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17148499315150467
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5462828074770284
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34060402684563756
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3554895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3946974734042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.406
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_Falcon3-MoE-2x7B-Insruct/6b208d1e-96f1-4b72-8d31-6c6e43c42111.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_Falcon3-MoE-2x7B-Insruct/6b208d1e-96f1-4b72-8d31-6c6e43c42111.json
deleted file mode 100644
index 6206b3a6c6e6b27c20c7a78f7b16f9b543b359fc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_Falcon3-MoE-2x7B-Insruct/6b208d1e-96f1-4b72-8d31-6c6e43c42111.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_Falcon3-MoE-2x7B-Insruct/1762652580.1388721",
- "retrieved_timestamp": "1762652580.138873",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/Falcon3-MoE-2x7B-Insruct",
- "developer": "ehristoforu",
- "inference_platform": "unknown",
- "id": "ehristoforu/Falcon3-MoE-2x7B-Insruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7642954028643998
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.564789641564995
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4123867069486405
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31208053691275167
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4840416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40949135638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 13.401
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_SoRu-0009/d45e7b32-f09d-4185-ac78-d0eb7a4d3823.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_SoRu-0009/d45e7b32-f09d-4185-ac78-d0eb7a4d3823.json
deleted file mode 100644
index 67da085439791f3e822af1645eeea04f3d10d97c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_SoRu-0009/d45e7b32-f09d-4185-ac78-d0eb7a4d3823.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_SoRu-0009/1762652580.1407459",
- "retrieved_timestamp": "1762652580.140747",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/SoRu-0009",
- "developer": "ehristoforu",
- "inference_platform": "unknown",
- "id": "ehristoforu/SoRu-0009"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25818827378023645
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3149981683579724
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.021148036253776436
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3369479166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12391954787234043
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_della-70b-test-v1/d9f6c1e9-84be-4666-b64f-5da37cf98202.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_della-70b-test-v1/d9f6c1e9-84be-4666-b64f-5da37cf98202.json
deleted file mode 100644
index bf0d712f769293026c1fd499cea750da5978284e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_della-70b-test-v1/d9f6c1e9-84be-4666-b64f-5da37cf98202.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_della-70b-test-v1/1762652580.141174",
- "retrieved_timestamp": "1762652580.141175",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/della-70b-test-v1",
- "developer": "ehristoforu",
- "inference_platform": "unknown",
- "id": "ehristoforu/della-70b-test-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49786566310722213
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3029452113782393
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.009818731117824773
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2525167785234899
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45545833333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1574966755319149
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_falcon3-ultraset/e2291d7c-7627-484e-a0c1-1857c642be2b.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_falcon3-ultraset/e2291d7c-7627-484e-a0c1-1857c642be2b.json
deleted file mode 100644
index 5844410db6766d6e252649a616ce1b0b9e060df5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_falcon3-ultraset/e2291d7c-7627-484e-a0c1-1857c642be2b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_falcon3-ultraset/1762652580.1413918",
- "retrieved_timestamp": "1762652580.141393",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/falcon3-ultraset",
- "developer": "ehristoforu",
- "inference_platform": "unknown",
- "id": "ehristoforu/falcon3-ultraset"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7135123694020753
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5583684420918801
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2122356495468278
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33221476510067116
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48531250000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.398188164893617
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 7.456
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fd-lora-merged-16x32/4d00474d-97e6-4384-82f7-956b2e7268e9.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fd-lora-merged-16x32/4d00474d-97e6-4384-82f7-956b2e7268e9.json
deleted file mode 100644
index 00b66f4481b3cc7777324050b0732979d6a58c4d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fd-lora-merged-16x32/4d00474d-97e6-4384-82f7-956b2e7268e9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_fd-lora-merged-16x32/1762652580.141611",
- "retrieved_timestamp": "1762652580.141612",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/fd-lora-merged-16x32",
- "developer": "ehristoforu",
- "inference_platform": "unknown",
- "id": "ehristoforu/fd-lora-merged-16x32"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3480897352358409
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3307564619842368
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17069486404833836
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2533557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35142708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12051196808510638
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.776
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fd-lora-merged-64x128/6474672b-7728-4ab5-8fdf-749e996272a2.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fd-lora-merged-64x128/6474672b-7728-4ab5-8fdf-749e996272a2.json
deleted file mode 100644
index 28ae169b101aa65e92bf45774c79fcbaf8139f67..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fd-lora-merged-64x128/6474672b-7728-4ab5-8fdf-749e996272a2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_fd-lora-merged-64x128/1762652580.14183",
- "retrieved_timestamp": "1762652580.141831",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/fd-lora-merged-64x128",
- "developer": "ehristoforu",
- "inference_platform": "unknown",
- "id": "ehristoforu/fd-lora-merged-64x128"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3281060918363276
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33447107385638297
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18731117824773413
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2550335570469799
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3368229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15367353723404256
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fp4-14b-it-v1/31618256-7ca8-4a3c-bfbf-4397bf2cf339.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fp4-14b-it-v1/31618256-7ca8-4a3c-bfbf-4397bf2cf339.json
deleted file mode 100644
index 1027f18540a3c1a02e2513aa438d3f888f70f2ae..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fp4-14b-it-v1/31618256-7ca8-4a3c-bfbf-4397bf2cf339.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_fp4-14b-it-v1/1762652580.1420429",
- "retrieved_timestamp": "1762652580.1420438",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/fp4-14b-it-v1",
- "developer": "ehristoforu",
- "inference_platform": "unknown",
- "id": "ehristoforu/fp4-14b-it-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25346746632269046
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5739715511094247
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04078549848942598
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35948958333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4204621010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fp4-14b-v1-fix/37d01a2d-f8ca-46a3-a4b7-3fa725b4023b.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fp4-14b-v1-fix/37d01a2d-f8ca-46a3-a4b7-3fa725b4023b.json
deleted file mode 100644
index 48602e406b9dea5a8bc2054d38dd46ac3b8ede56..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fp4-14b-v1-fix/37d01a2d-f8ca-46a3-a4b7-3fa725b4023b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_fp4-14b-v1-fix/1762652580.142252",
- "retrieved_timestamp": "1762652580.1422532",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/fp4-14b-v1-fix",
- "developer": "ehristoforu",
- "inference_platform": "unknown",
- "id": "ehristoforu/fp4-14b-v1-fix"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6741700909143296
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6817274121032688
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4206948640483384
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3540268456375839
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4531875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5353224734042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 14.66
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fq2.5-7b-it-normalize_false/a5004f95-0854-40d2-8a71-004875544499.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fq2.5-7b-it-normalize_false/a5004f95-0854-40d2-8a71-004875544499.json
deleted file mode 100644
index cfd4a01d39f4ac61fa965ce18623f5f83ec3bea1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fq2.5-7b-it-normalize_false/a5004f95-0854-40d2-8a71-004875544499.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_fq2.5-7b-it-normalize_false/1762652580.142459",
- "retrieved_timestamp": "1762652580.1424599",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/fq2.5-7b-it-normalize_false",
- "developer": "ehristoforu",
- "inference_platform": "unknown",
- "id": "ehristoforu/fq2.5-7b-it-normalize_false"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7399156460413925
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.551986272150289
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4622356495468278
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46115625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44132313829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fq2.5-7b-it-normalize_true/d0d8274c-7d05-4166-a510-487cb294135e.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fq2.5-7b-it-normalize_true/d0d8274c-7d05-4166-a510-487cb294135e.json
deleted file mode 100644
index b5e9bfbb83b7a34915ae30ef9f08a136acbac46d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_fq2.5-7b-it-normalize_true/d0d8274c-7d05-4166-a510-487cb294135e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_fq2.5-7b-it-normalize_true/1762652580.1426702",
- "retrieved_timestamp": "1762652580.142671",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/fq2.5-7b-it-normalize_true",
- "developer": "ehristoforu",
- "inference_platform": "unknown",
- "id": "ehristoforu/fq2.5-7b-it-normalize_true"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7399156460413925
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.551986272150289
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4622356495468278
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46115625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44132313829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_mllama-3.1-8b-instruct/40016b83-0730-4e67-b7e9-3b1d29d9d1be.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_mllama-3.1-8b-instruct/40016b83-0730-4e67-b7e9-3b1d29d9d1be.json
deleted file mode 100644
index 98d813390676b81f83f384ce6320b8c46494febd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_mllama-3.1-8b-instruct/40016b83-0730-4e67-b7e9-3b1d29d9d1be.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_mllama-3.1-8b-instruct/1762652580.143588",
- "retrieved_timestamp": "1762652580.143589",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/mllama-3.1-8b-instruct",
- "developer": "ehristoforu",
- "inference_platform": "unknown",
- "id": "ehristoforu/mllama-3.1-8b-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3457913890698901
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47176616480333583
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3776435045317221
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.338
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2533244680851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_moremerge-upscaled/5c465aeb-c6be-4a22-9cf0-3d9c2558ba39.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_moremerge-upscaled/5c465aeb-c6be-4a22-9cf0-3d9c2558ba39.json
deleted file mode 100644
index de2e368846a6f4e75821a11f10a009b4ea2a9128..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_moremerge-upscaled/5c465aeb-c6be-4a22-9cf0-3d9c2558ba39.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_moremerge-upscaled/1762652580.144358",
- "retrieved_timestamp": "1762652580.1443589",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/moremerge-upscaled",
- "developer": "ehristoforu",
- "inference_platform": "unknown",
- "id": "ehristoforu/moremerge-upscaled"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1978882697908217
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26977370070980244
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24664429530201343
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35930208333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10413896276595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 8.545
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_moremerge/38cf2a56-ed33-4f7e-94aa-bf4f15a5a53c.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_moremerge/38cf2a56-ed33-4f7e-94aa-bf4f15a5a53c.json
deleted file mode 100644
index e1a75e5d147d6d2bd627cc445ac9e6c7e7148d4d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_moremerge/38cf2a56-ed33-4f7e-94aa-bf4f15a5a53c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_moremerge/1762652580.1440692",
- "retrieved_timestamp": "1762652580.14407",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/moremerge",
- "developer": "ehristoforu",
- "inference_platform": "unknown",
- "id": "ehristoforu/moremerge"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20190982149585324
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28684447696551024
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35657291666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10654920212765957
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_rmoe-v1/e58aecba-3254-426d-aac2-05a32c3cbdab.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_rmoe-v1/e58aecba-3254-426d-aac2-05a32c3cbdab.json
deleted file mode 100644
index aec0041ea3019a75590f7e4da0d4003fd84a2ff1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_rmoe-v1/e58aecba-3254-426d-aac2-05a32c3cbdab.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_rmoe-v1/1762652580.1453388",
- "retrieved_timestamp": "1762652580.14534",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/rmoe-v1",
- "developer": "ehristoforu",
- "inference_platform": "unknown",
- "id": "ehristoforu/rmoe-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26500795666609045
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29292907133609175
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0015105740181268882
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25838926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36634374999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1124501329787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2MoeForCausalLM",
- "params_billions": 11.026
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_rufalcon3-3b-it/8f4336f8-1fdb-4a3d-8b9a-2e7c5e156f07.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_rufalcon3-3b-it/8f4336f8-1fdb-4a3d-8b9a-2e7c5e156f07.json
deleted file mode 100644
index da62d6188320f38489d920919804f1011c15b0be..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_rufalcon3-3b-it/8f4336f8-1fdb-4a3d-8b9a-2e7c5e156f07.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_rufalcon3-3b-it/1762652580.14555",
- "retrieved_timestamp": "1762652580.14555",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/rufalcon3-3b-it",
- "developer": "ehristoforu",
- "inference_platform": "unknown",
- "id": "ehristoforu/rufalcon3-3b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5942111375594533
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41554222543957625
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1782477341389728
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38953124999999994
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2347905585106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.228
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_testq-32b/d5acc9ed-9fd1-411f-a85c-e790521e7fe4.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_testq-32b/d5acc9ed-9fd1-411f-a85c-e790521e7fe4.json
deleted file mode 100644
index afa7e4db708ec26c30e363693d79711eca128c4d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_testq-32b/d5acc9ed-9fd1-411f-a85c-e790521e7fe4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_testq-32b/1762652580.145958",
- "retrieved_timestamp": "1762652580.145958",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/testq-32b",
- "developer": "ehristoforu",
- "inference_platform": "unknown",
- "id": "ehristoforu/testq-32b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18759668789921852
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2876549792486152
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0030211480362537764
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25419463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3714583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11660571808510638
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 56.165
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_tmoe-v2/0a84406f-a970-4a03-8d2f-c82a8bbd3872.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_tmoe-v2/0a84406f-a970-4a03-8d2f-c82a8bbd3872.json
deleted file mode 100644
index 3550073e229135bfe52e389016317b9e7b338dbf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_tmoe-v2/0a84406f-a970-4a03-8d2f-c82a8bbd3872.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_tmoe-v2/1762652580.146366",
- "retrieved_timestamp": "1762652580.146367",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/tmoe-v2",
- "developer": "ehristoforu",
- "inference_platform": "unknown",
- "id": "ehristoforu/tmoe-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19026959578363187
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2896740649804915
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0022658610271903325
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2634228187919463
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4150833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11003989361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2MoeForCausalLM",
- "params_billions": 11.026
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_tmoe/0a160c2d-06ed-43c0-8705-bd76e47c093a.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_tmoe/0a160c2d-06ed-43c0-8705-bd76e47c093a.json
deleted file mode 100644
index 36945403771a858dc6b767d305dbbe19214390e3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_tmoe/0a160c2d-06ed-43c0-8705-bd76e47c093a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_tmoe/1762652580.1461592",
- "retrieved_timestamp": "1762652580.1461592",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/tmoe",
- "developer": "ehristoforu",
- "inference_platform": "unknown",
- "id": "ehristoforu/tmoe"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11930234001338672
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30728601408520645
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0075528700906344415
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2231543624161074
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36990624999999994
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11909906914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2MoeForCausalLM",
- "params_billions": 11.026
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_trd-7b-it/3bd7f3c1-772a-45fa-9d71-a6e3dff3b54f.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_trd-7b-it/3bd7f3c1-772a-45fa-9d71-a6e3dff3b54f.json
deleted file mode 100644
index 7a5e5975b66eb72112e8ed624eb83502a9c72e40..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_trd-7b-it/3bd7f3c1-772a-45fa-9d71-a6e3dff3b54f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_trd-7b-it/1762652580.146566",
- "retrieved_timestamp": "1762652580.1465669",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/trd-7b-it",
- "developer": "ehristoforu",
- "inference_platform": "unknown",
- "id": "ehristoforu/trd-7b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21847143357402804
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2990238931062931
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03172205438066465
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3794270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11785239361702128
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.613
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_ud-14b/7e7ffbef-c8d4-47ff-9ae6-7f0701e9e192.json b/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_ud-14b/7e7ffbef-c8d4-47ff-9ae6-7f0701e9e192.json
deleted file mode 100644
index 73198648abe7cb5c3773b68b1d3ab2e644ea387a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ehristoforu/ehristoforu_ud-14b/7e7ffbef-c8d4-47ff-9ae6-7f0701e9e192.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_ud-14b/1762652580.146786",
- "retrieved_timestamp": "1762652580.146786",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/ud-14b",
- "developer": "ehristoforu",
- "inference_platform": "unknown",
- "id": "ehristoforu/ud-14b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4235273518708139
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3323819044961654
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1903323262839879
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23741610738255034
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43942708333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24152260638297873
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.766
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/elinas/elinas_Chronos-Gold-12B-1.0/4705d82c-514c-48a1-8f87-4d2b8f9aff6b.json b/leaderboard_data/HFOpenLLMv2/elinas/elinas_Chronos-Gold-12B-1.0/4705d82c-514c-48a1-8f87-4d2b8f9aff6b.json
deleted file mode 100644
index de8bcee6d77aa81e2d1a4c1e963af67c36892e9a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/elinas/elinas_Chronos-Gold-12B-1.0/4705d82c-514c-48a1-8f87-4d2b8f9aff6b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/elinas_Chronos-Gold-12B-1.0/1762652580.1470149",
- "retrieved_timestamp": "1762652580.147016",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "elinas/Chronos-Gold-12B-1.0",
- "developer": "elinas",
- "inference_platform": "unknown",
- "id": "elinas/Chronos-Gold-12B-1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3165656014929277
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5514664110708439
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06948640483383686
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3179530201342282
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47398958333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.351811835106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/euclaise/euclaise_ReMask-3B/a905005d-85fa-44c9-848b-286f9100bab7.json b/leaderboard_data/HFOpenLLMv2/euclaise/euclaise_ReMask-3B/a905005d-85fa-44c9-848b-286f9100bab7.json
deleted file mode 100644
index 360ef38facaec3bafbb311a6c67b7f18a1ab47e3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/euclaise/euclaise_ReMask-3B/a905005d-85fa-44c9-848b-286f9100bab7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/euclaise_ReMask-3B/1762652580.14753",
- "retrieved_timestamp": "1762652580.147531",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "euclaise/ReMask-3B",
- "developer": "euclaise",
- "inference_platform": "unknown",
- "id": "euclaise/ReMask-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2419269759792905
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3516779692917367
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.019637462235649546
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26677852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33409375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13572140957446807
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "StableLmForCausalLM",
- "params_billions": 2.795
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/eworojoshua/eworojoshua_vas-01/f02ca364-4bf8-4f00-aecc-492ac1f0817a.json b/leaderboard_data/HFOpenLLMv2/eworojoshua/eworojoshua_vas-01/f02ca364-4bf8-4f00-aecc-492ac1f0817a.json
deleted file mode 100644
index f3b619c60979746bf5095e225d5cfd4bea42032a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/eworojoshua/eworojoshua_vas-01/f02ca364-4bf8-4f00-aecc-492ac1f0817a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/eworojoshua_vas-01/1762652580.1477718",
- "retrieved_timestamp": "1762652580.147773",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "eworojoshua/vas-01",
- "developer": "eworojoshua",
- "inference_platform": "unknown",
- "id": "eworojoshua/vas-01"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7612479332615238
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5417819433732887
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4735649546827795
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44323958333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4347573138297872
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_Thinker-Llama-3.2-3B-Instruct-Reasoning/8bdc63c5-2ed3-4738-8a5c-6b90ba969f99.json b/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_Thinker-Llama-3.2-3B-Instruct-Reasoning/8bdc63c5-2ed3-4738-8a5c-6b90ba969f99.json
deleted file mode 100644
index 9b70b31383de8b39c99500c6ed8a4197e6729a05..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_Thinker-Llama-3.2-3B-Instruct-Reasoning/8bdc63c5-2ed3-4738-8a5c-6b90ba969f99.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ewre324_Thinker-Llama-3.2-3B-Instruct-Reasoning/1762652580.148031",
- "retrieved_timestamp": "1762652580.148032",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning",
- "developer": "ewre324",
- "inference_platform": "unknown",
- "id": "ewre324/Thinker-Llama-3.2-3B-Instruct-Reasoning"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44388555698878973
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4273125047156003
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08459214501510574
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27684563758389263
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36553125000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2886469414893617
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.213
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_Thinker-Qwen2.5-0.5B-Instruct-Reasoning/fe29c3e7-463b-45a1-8377-97e7c7f21874.json b/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_Thinker-Qwen2.5-0.5B-Instruct-Reasoning/fe29c3e7-463b-45a1-8377-97e7c7f21874.json
deleted file mode 100644
index 7b4e0244c15cf9039587a5036d2c72d7d1e5be1b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_Thinker-Qwen2.5-0.5B-Instruct-Reasoning/fe29c3e7-463b-45a1-8377-97e7c7f21874.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ewre324_Thinker-Qwen2.5-0.5B-Instruct-Reasoning/1762652580.148299",
- "retrieved_timestamp": "1762652580.1483",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning",
- "developer": "ewre324",
- "inference_platform": "unknown",
- "id": "ewre324/Thinker-Qwen2.5-0.5B-Instruct-Reasoning"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2476473534665798
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3292122979013761
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.028700906344410877
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33821875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16472739361702127
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 0.494
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_Thinker-SmolLM2-135M-Instruct-Reasoning/5a03703c-6934-437c-aaca-2acfdd4ca629.json b/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_Thinker-SmolLM2-135M-Instruct-Reasoning/5a03703c-6934-437c-aaca-2acfdd4ca629.json
deleted file mode 100644
index 3a94014a97ba8ce0462d38647eb345d3982a18ce..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_Thinker-SmolLM2-135M-Instruct-Reasoning/5a03703c-6934-437c-aaca-2acfdd4ca629.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ewre324_Thinker-SmolLM2-135M-Instruct-Reasoning/1762652580.148509",
- "retrieved_timestamp": "1762652580.14851",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning",
- "developer": "ewre324",
- "inference_platform": "unknown",
- "id": "ewre324/Thinker-SmolLM2-135M-Instruct-Reasoning"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25836336476105626
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3071349750892843
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.00906344410876133
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2525167785234899
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.366125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.109375
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_ewre324-R1-SmolLM2-135M-Distill/6429c440-4d89-4d31-919c-63cde25ba99f.json b/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_ewre324-R1-SmolLM2-135M-Distill/6429c440-4d89-4d31-919c-63cde25ba99f.json
deleted file mode 100644
index 2f4e88c8b9bb4a862422ab815dae98b74083aa49..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ewre324/ewre324_ewre324-R1-SmolLM2-135M-Distill/6429c440-4d89-4d31-919c-63cde25ba99f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ewre324_ewre324-R1-SmolLM2-135M-Distill/1762652580.148724",
- "retrieved_timestamp": "1762652580.148725",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ewre324/ewre324-R1-SmolLM2-135M-Distill",
- "developer": "ewre324",
- "inference_platform": "unknown",
- "id": "ewre324/ewre324-R1-SmolLM2-135M-Distill"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16489026893088118
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3041695757290421
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01283987915407855
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3409166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11336436170212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.135
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/experiment-llm/experiment-llm_exp-3-q-r/7d72dcb1-bc5d-41bf-b333-c21e67b0acd2.json b/leaderboard_data/HFOpenLLMv2/experiment-llm/experiment-llm_exp-3-q-r/7d72dcb1-bc5d-41bf-b333-c21e67b0acd2.json
deleted file mode 100644
index bb964112887a2dca9bce48ee2ec476575a4bd4c8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/experiment-llm/experiment-llm_exp-3-q-r/7d72dcb1-bc5d-41bf-b333-c21e67b0acd2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/experiment-llm_exp-3-q-r/1762652580.148931",
- "retrieved_timestamp": "1762652580.148932",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "experiment-llm/exp-3-q-r",
- "developer": "experiment-llm",
- "inference_platform": "unknown",
- "id": "experiment-llm/exp-3-q-r"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6035785050333116
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5397159253811645
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27870090634441086
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43154166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43159906914893614
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/facebook/facebook_opt-1.3b/8675526d-af0b-4bf2-b143-123249371076.json b/leaderboard_data/HFOpenLLMv2/facebook/facebook_opt-1.3b/8675526d-af0b-4bf2-b143-123249371076.json
deleted file mode 100644
index 3de442eca1b31e205dc4408e6c2f49ce057ba123..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/facebook/facebook_opt-1.3b/8675526d-af0b-4bf2-b143-123249371076.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/facebook_opt-1.3b/1762652580.14919",
- "retrieved_timestamp": "1762652580.14919",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "facebook/opt-1.3b",
- "developer": "facebook",
- "inference_platform": "unknown",
- "id": "facebook/opt-1.3b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23832985367713222
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3093947052760125
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.00906344410876133
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2424496644295302
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.342
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11070478723404255
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "OPTForCausalLM",
- "params_billions": 1.3
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/facebook/facebook_opt-30b/1883ddb6-e4cc-4935-81ba-af30af1537e9.json b/leaderboard_data/HFOpenLLMv2/facebook/facebook_opt-30b/1883ddb6-e4cc-4935-81ba-af30af1537e9.json
deleted file mode 100644
index 151319769bc718d62460aab8df698b83c0a451d2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/facebook/facebook_opt-30b/1883ddb6-e4cc-4935-81ba-af30af1537e9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/facebook_opt-30b/1762652580.14943",
- "retrieved_timestamp": "1762652580.149431",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "facebook/opt-30b",
- "developer": "facebook",
- "inference_platform": "unknown",
- "id": "facebook/opt-30b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2452991396162183
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30703447525623373
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.010574018126888218
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26929530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36041666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1163563829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "OPTForCausalLM",
- "params_billions": 30.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/failspy/failspy_Llama-3-8B-Instruct-MopeyMule/f5bfa461-15bf-4e32-8471-74f456c62fd9.json b/leaderboard_data/HFOpenLLMv2/failspy/failspy_Llama-3-8B-Instruct-MopeyMule/f5bfa461-15bf-4e32-8471-74f456c62fd9.json
deleted file mode 100644
index 9cc143c153699c7749d2bd9cd4cd420347d4b379..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/failspy/failspy_Llama-3-8B-Instruct-MopeyMule/f5bfa461-15bf-4e32-8471-74f456c62fd9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/failspy_Llama-3-8B-Instruct-MopeyMule/1762652580.1496441",
- "retrieved_timestamp": "1762652580.1496441",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "failspy/Llama-3-8B-Instruct-MopeyMule",
- "developer": "failspy",
- "inference_platform": "unknown",
- "id": "failspy/Llama-3-8B-Instruct-MopeyMule"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6750444376476638
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.383874490132152
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.019637462235649546
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23909395973154363
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35130208333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17644614361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/failspy/failspy_Llama-3-8B-Instruct-abliterated/8aa6c90e-a6ee-4dfe-8bf4-b5d256be9cd6.json b/leaderboard_data/HFOpenLLMv2/failspy/failspy_Llama-3-8B-Instruct-abliterated/8aa6c90e-a6ee-4dfe-8bf4-b5d256be9cd6.json
deleted file mode 100644
index 43304db70c6a13fbddfa06e9c964ea3483863972..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/failspy/failspy_Llama-3-8B-Instruct-abliterated/8aa6c90e-a6ee-4dfe-8bf4-b5d256be9cd6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/failspy_Llama-3-8B-Instruct-abliterated/1762652580.1499012",
- "retrieved_timestamp": "1762652580.149902",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "failspy/Llama-3-8B-Instruct-abliterated",
- "developer": "failspy",
- "inference_platform": "unknown",
- "id": "failspy/Llama-3-8B-Instruct-abliterated"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5908888416069362
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4353752684977051
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03851963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41158333333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2741855053191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/failspy/failspy_Meta-Llama-3-70B-Instruct-abliterated-v3.5/e0329607-d832-4252-ad71-81e8a8c4bb31.json b/leaderboard_data/HFOpenLLMv2/failspy/failspy_Meta-Llama-3-70B-Instruct-abliterated-v3.5/e0329607-d832-4252-ad71-81e8a8c4bb31.json
deleted file mode 100644
index 31f071b311f83c827bf9e47b76d681ab208cc650..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/failspy/failspy_Meta-Llama-3-70B-Instruct-abliterated-v3.5/e0329607-d832-4252-ad71-81e8a8c4bb31.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/failspy_Meta-Llama-3-70B-Instruct-abliterated-v3.5/1762652580.1501682",
- "retrieved_timestamp": "1762652580.1501691",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5",
- "developer": "failspy",
- "inference_platform": "unknown",
- "id": "failspy/Meta-Llama-3-70B-Instruct-abliterated-v3.5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7746867201248244
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.574710022890038
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1283987915407855
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39818749999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44522938829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/failspy/failspy_Meta-Llama-3-8B-Instruct-abliterated-v3/c598dbff-4ab5-4405-b75d-13571ae3d862.json b/leaderboard_data/HFOpenLLMv2/failspy/failspy_Meta-Llama-3-8B-Instruct-abliterated-v3/c598dbff-4ab5-4405-b75d-13571ae3d862.json
deleted file mode 100644
index 20fce262096ffb79203f0e63c8312c92f6a30d1e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/failspy/failspy_Meta-Llama-3-8B-Instruct-abliterated-v3/c598dbff-4ab5-4405-b75d-13571ae3d862.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/failspy_Meta-Llama-3-8B-Instruct-abliterated-v3/1762652580.150389",
- "retrieved_timestamp": "1762652580.15039",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3",
- "developer": "failspy",
- "inference_platform": "unknown",
- "id": "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7244533393617822
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4924562150856957
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09592145015105741
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36218749999999994
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3653590425531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/failspy/failspy_Phi-3-medium-4k-instruct-abliterated-v3/264bc4a6-f0ad-4eef-a519-6d97f8f6ab91.json b/leaderboard_data/HFOpenLLMv2/failspy/failspy_Phi-3-medium-4k-instruct-abliterated-v3/264bc4a6-f0ad-4eef-a519-6d97f8f6ab91.json
deleted file mode 100644
index f0b20515798b1dba02ad13290d5629b97c07fe87..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/failspy/failspy_Phi-3-medium-4k-instruct-abliterated-v3/264bc4a6-f0ad-4eef-a519-6d97f8f6ab91.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/failspy_Phi-3-medium-4k-instruct-abliterated-v3/1762652580.1505978",
- "retrieved_timestamp": "1762652580.150599",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "failspy/Phi-3-medium-4k-instruct-abliterated-v3",
- "developer": "failspy",
- "inference_platform": "unknown",
- "id": "failspy/Phi-3-medium-4k-instruct-abliterated-v3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6319299458769398
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6304799176474429
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1593655589123867
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31711409395973156
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4604166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4399933510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Phi3ForCausalLM",
- "params_billions": 13.96
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/failspy/failspy_llama-3-70B-Instruct-abliterated/f31f7ad3-9018-4891-be05-12787728904c.json b/leaderboard_data/HFOpenLLMv2/failspy/failspy_llama-3-70B-Instruct-abliterated/f31f7ad3-9018-4891-be05-12787728904c.json
deleted file mode 100644
index 7d7a8c725742421d3f29c04fe526842ab264ae7d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/failspy/failspy_llama-3-70B-Instruct-abliterated/f31f7ad3-9018-4891-be05-12787728904c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/failspy_llama-3-70B-Instruct-abliterated/1762652580.1508029",
- "retrieved_timestamp": "1762652580.150804",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "failspy/llama-3-70B-Instruct-abliterated",
- "developer": "failspy",
- "inference_platform": "unknown",
- "id": "failspy/llama-3-70B-Instruct-abliterated"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8023389052159382
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6464853840398571
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.243202416918429
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4127604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5145445478723404
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 70.554
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_TheBeagle-v2beta-32B-MGS/63bdc7e2-6518-4da4-81f4-74aab25f7a5e.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_TheBeagle-v2beta-32B-MGS/63bdc7e2-6518-4da4-81f4-74aab25f7a5e.json
deleted file mode 100644
index e23f480a5cc0240691efb418e9241f58c28639fa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_TheBeagle-v2beta-32B-MGS/63bdc7e2-6518-4da4-81f4-74aab25f7a5e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/fblgit_TheBeagle-v2beta-32B-MGS/1762652580.1510022",
- "retrieved_timestamp": "1762652580.151003",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "fblgit/TheBeagle-v2beta-32B-MGS",
- "developer": "fblgit",
- "inference_platform": "unknown",
- "id": "fblgit/TheBeagle-v2beta-32B-MGS"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.518074265171966
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7032634749563558
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4947129909365559
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3825503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.50075
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5915059840425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_TheBeagle-v2beta-32B-MGS/8338dd8a-88c2-42f8-9d67-13b852e3c0ea.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_TheBeagle-v2beta-32B-MGS/8338dd8a-88c2-42f8-9d67-13b852e3c0ea.json
deleted file mode 100644
index bed0fee091f745a0bf5862ebc224e3b1d0d5f640..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_TheBeagle-v2beta-32B-MGS/8338dd8a-88c2-42f8-9d67-13b852e3c0ea.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/fblgit_TheBeagle-v2beta-32B-MGS/1762652580.151249",
- "retrieved_timestamp": "1762652580.151249",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "fblgit/TheBeagle-v2beta-32B-MGS",
- "developer": "fblgit",
- "inference_platform": "unknown",
- "id": "fblgit/TheBeagle-v2beta-32B-MGS"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4503051902285935
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.703542441088263
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3942598187311178
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.401006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5021145833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5910904255319149
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_UNA-SimpleSmaug-34b-v1beta/f98b051e-0984-423d-89c0-352368168d75.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_UNA-SimpleSmaug-34b-v1beta/f98b051e-0984-423d-89c0-352368168d75.json
deleted file mode 100644
index 2d03947a2da055eb1dfac6a2eb1a948392955994..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_UNA-SimpleSmaug-34b-v1beta/f98b051e-0984-423d-89c0-352368168d75.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/fblgit_UNA-SimpleSmaug-34b-v1beta/1762652580.151433",
- "retrieved_timestamp": "1762652580.151433",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "fblgit/UNA-SimpleSmaug-34b-v1beta",
- "developer": "fblgit",
- "inference_platform": "unknown",
- "id": "fblgit/UNA-SimpleSmaug-34b-v1beta"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45562551806983254
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5286654104993475
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07175226586102719
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31711409395973156
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4255625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4539561170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 34.389
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_UNA-TheBeagle-7b-v1/454be483-8a45-4bea-a370-5f5a74a924ea.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_UNA-TheBeagle-7b-v1/454be483-8a45-4bea-a370-5f5a74a924ea.json
deleted file mode 100644
index dc7f95417adbf998d282d04156bf9a5f2de878a4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_UNA-TheBeagle-7b-v1/454be483-8a45-4bea-a370-5f5a74a924ea.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/fblgit_UNA-TheBeagle-7b-v1/1762652580.151644",
- "retrieved_timestamp": "1762652580.151645",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "fblgit/UNA-TheBeagle-7b-v1",
- "developer": "fblgit",
- "inference_platform": "unknown",
- "id": "fblgit/UNA-TheBeagle-7b-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36887236975669
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5028691097522866
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0770392749244713
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4564375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3019448138297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_UNA-ThePitbull-21.4B-v2/afdf8e40-d87a-4a9c-93a7-a65fe2ae732a.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_UNA-ThePitbull-21.4B-v2/afdf8e40-d87a-4a9c-93a7-a65fe2ae732a.json
deleted file mode 100644
index d9e243c3e281fb554af852bffa19af8209b7954a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_UNA-ThePitbull-21.4B-v2/afdf8e40-d87a-4a9c-93a7-a65fe2ae732a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/fblgit_UNA-ThePitbull-21.4B-v2/1762652580.151847",
- "retrieved_timestamp": "1762652580.151847",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "fblgit/UNA-ThePitbull-21.4B-v2",
- "developer": "fblgit",
- "inference_platform": "unknown",
- "id": "fblgit/UNA-ThePitbull-21.4B-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3790387283518841
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.635038821016254
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1216012084592145
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3921666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3515625
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 21.421
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_cybertron-v4-qw7B-MGS/60ac5509-346d-4717-a729-0413fce4b203.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_cybertron-v4-qw7B-MGS/60ac5509-346d-4717-a729-0413fce4b203.json
deleted file mode 100644
index bdc0183722655a1ebf52d66109714cb9f3f38a56..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_cybertron-v4-qw7B-MGS/60ac5509-346d-4717-a729-0413fce4b203.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/fblgit_cybertron-v4-qw7B-MGS/1762652580.15205",
- "retrieved_timestamp": "1762652580.152051",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "fblgit/cybertron-v4-qw7B-MGS",
- "developer": "fblgit",
- "inference_platform": "unknown",
- "id": "fblgit/cybertron-v4-qw7B-MGS"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6263846593704703
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5591772533435835
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34894259818731116
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43709375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44730718085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_cybertron-v4-qw7B-UNAMGS/8c73c2a6-b2e9-419d-8c00-8a983790ba9b.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_cybertron-v4-qw7B-UNAMGS/8c73c2a6-b2e9-419d-8c00-8a983790ba9b.json
deleted file mode 100644
index 9f135528e0d4ce80c4f4a8aede6304f24caa114b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_cybertron-v4-qw7B-UNAMGS/8c73c2a6-b2e9-419d-8c00-8a983790ba9b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/fblgit_cybertron-v4-qw7B-UNAMGS/1762652580.1522481",
- "retrieved_timestamp": "1762652580.152249",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "fblgit/cybertron-v4-qw7B-UNAMGS",
- "developer": "fblgit",
- "inference_platform": "unknown",
- "id": "fblgit/cybertron-v4-qw7B-UNAMGS"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6090240561709597
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5642509108139038
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3731117824773414
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4343333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4500498670212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_juanako-7b-UNA/f61e534a-06b4-4558-8ee6-227ad1e97699.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_juanako-7b-UNA/f61e534a-06b4-4558-8ee6-227ad1e97699.json
deleted file mode 100644
index 306940a73a5ea4deaa749ecbb0c5363d304178b6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_juanako-7b-UNA/f61e534a-06b4-4558-8ee6-227ad1e97699.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/fblgit_juanako-7b-UNA/1762652580.1524491",
- "retrieved_timestamp": "1762652580.15245",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "fblgit/juanako-7b-UNA",
- "developer": "fblgit",
- "inference_platform": "unknown",
- "id": "fblgit/juanako-7b-UNA"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4837276204914073
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.507001145736535
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.033987915407854986
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46449999999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.277094414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO/a1d14150-3b2e-489f-8d18-8894862e9ab0.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO/a1d14150-3b2e-489f-8d18-8894862e9ab0.json
deleted file mode 100644
index 039893179fe04bad02ff2d901afca4fc80550eda..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO/a1d14150-3b2e-489f-8d18-8894862e9ab0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/fblgit_miniclaus-qw1.5B-UNAMGS-GRPO/1762652580.153163",
- "retrieved_timestamp": "1762652580.1531641",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "fblgit/miniclaus-qw1.5B-UNAMGS-GRPO",
- "developer": "fblgit",
- "inference_platform": "unknown",
- "id": "fblgit/miniclaus-qw1.5B-UNAMGS-GRPO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3518364605912313
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.423443453814005
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11027190332326284
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42543749999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2945478723404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_miniclaus-qw1.5B-UNAMGS/4b337805-4bd3-4106-bcde-adb7a6fbec23.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_miniclaus-qw1.5B-UNAMGS/4b337805-4bd3-4106-bcde-adb7a6fbec23.json
deleted file mode 100644
index 5a98081f1d3fc9a6678f077b5a4eb05a5410731f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_miniclaus-qw1.5B-UNAMGS/4b337805-4bd3-4106-bcde-adb7a6fbec23.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/fblgit_miniclaus-qw1.5B-UNAMGS/1762652580.152649",
- "retrieved_timestamp": "1762652580.152649",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "fblgit/miniclaus-qw1.5B-UNAMGS",
- "developer": "fblgit",
- "inference_platform": "unknown",
- "id": "fblgit/miniclaus-qw1.5B-UNAMGS"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3348005514257725
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4238588294007628
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10876132930513595
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42934374999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2937167553191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_pancho-v1-qw25-3B-UNAMGS/701cb3af-8916-47ab-b118-1cd778a23e66.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_pancho-v1-qw25-3B-UNAMGS/701cb3af-8916-47ab-b118-1cd778a23e66.json
deleted file mode 100644
index 6bb5c79b98297acff5316d5ae4bc4116f69ea7d8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_pancho-v1-qw25-3B-UNAMGS/701cb3af-8916-47ab-b118-1cd778a23e66.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/fblgit_pancho-v1-qw25-3B-UNAMGS/1762652580.153452",
- "retrieved_timestamp": "1762652580.153453",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "fblgit/pancho-v1-qw25-3B-UNAMGS",
- "developer": "fblgit",
- "inference_platform": "unknown",
- "id": "fblgit/pancho-v1-qw25-3B-UNAMGS"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.536134124123991
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49258278193390775
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15709969788519637
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4027395833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3765791223404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.397
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_una-cybertron-7b-v2-bf16/8fc3e145-958b-4f25-bfab-4364bcdfeeb1.json b/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_una-cybertron-7b-v2-bf16/8fc3e145-958b-4f25-bfab-4364bcdfeeb1.json
deleted file mode 100644
index bbb435f644dfb73b77d3356e364b6c78091bb07c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/fblgit/fblgit_una-cybertron-7b-v2-bf16/8fc3e145-958b-4f25-bfab-4364bcdfeeb1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/fblgit_una-cybertron-7b-v2-bf16/1762652580.153698",
- "retrieved_timestamp": "1762652580.1536992",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "fblgit/una-cybertron-7b-v2-bf16",
- "developer": "fblgit",
- "inference_platform": "unknown",
- "id": "fblgit/una-cybertron-7b-v2-bf16"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47371086494944525
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3973388920486269
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04078549848942598
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4473229166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2442652925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/fhai50032/fhai50032_RolePlayLake-7B/af85e87f-1308-4968-850a-27382f36a63a.json b/leaderboard_data/HFOpenLLMv2/fhai50032/fhai50032_RolePlayLake-7B/af85e87f-1308-4968-850a-27382f36a63a.json
deleted file mode 100644
index 81a70a26423f46b1f8511285dd6622f05cf983b5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/fhai50032/fhai50032_RolePlayLake-7B/af85e87f-1308-4968-850a-27382f36a63a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/fhai50032_RolePlayLake-7B/1762652580.153994",
- "retrieved_timestamp": "1762652580.153995",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "fhai50032/RolePlayLake-7B",
- "developer": "fhai50032",
- "inference_platform": "unknown",
- "id": "fhai50032/RolePlayLake-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5056594280952318
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5252170095233862
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07250755287009064
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4459270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3159906914893617
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/flammenai/flammenai_flammen15-gutenberg-DPO-v1-7B/1244b8d9-e832-4f2b-8ae5-52449f6ac38c.json b/leaderboard_data/HFOpenLLMv2/flammenai/flammenai_flammen15-gutenberg-DPO-v1-7B/1244b8d9-e832-4f2b-8ae5-52449f6ac38c.json
deleted file mode 100644
index a2aabfdf843011d665079dd64c3dd1bcc0f19897..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/flammenai/flammenai_flammen15-gutenberg-DPO-v1-7B/1244b8d9-e832-4f2b-8ae5-52449f6ac38c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/flammenai_flammen15-gutenberg-DPO-v1-7B/1762652580.155953",
- "retrieved_timestamp": "1762652580.155954",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "flammenai/flammen15-gutenberg-DPO-v1-7B",
- "developer": "flammenai",
- "inference_platform": "unknown",
- "id": "flammenai/flammen15-gutenberg-DPO-v1-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47980580415519714
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5202983979716951
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07628398791540786
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4293125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3185671542553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/fluently-lm/fluently-lm_FluentlyLM-Prinum/950d2518-7245-4ed4-9b16-91f944aa8f15.json b/leaderboard_data/HFOpenLLMv2/fluently-lm/fluently-lm_FluentlyLM-Prinum/950d2518-7245-4ed4-9b16-91f944aa8f15.json
deleted file mode 100644
index f2e6cac4f8306d59d4cbf40354045bebef1598ed..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/fluently-lm/fluently-lm_FluentlyLM-Prinum/950d2518-7245-4ed4-9b16-91f944aa8f15.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/fluently-lm_FluentlyLM-Prinum/1762652580.156252",
- "retrieved_timestamp": "1762652580.1562529",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "fluently-lm/FluentlyLM-Prinum",
- "developer": "fluently-lm",
- "inference_platform": "unknown",
- "id": "fluently-lm/FluentlyLM-Prinum"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.809033364805383
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7143813967889198
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5400302114803626
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38674496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44714583333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5807845744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/fluently-lm/fluently-lm_Llama-TI-8B-Instruct/47960f3f-b39c-4641-8a94-fb70f9a6a53f.json b/leaderboard_data/HFOpenLLMv2/fluently-lm/fluently-lm_Llama-TI-8B-Instruct/47960f3f-b39c-4641-8a94-fb70f9a6a53f.json
deleted file mode 100644
index 64e676ca9865a66964f5eb48de69e2a7d828a10e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/fluently-lm/fluently-lm_Llama-TI-8B-Instruct/47960f3f-b39c-4641-8a94-fb70f9a6a53f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/fluently-lm_Llama-TI-8B-Instruct/1762652580.156872",
- "retrieved_timestamp": "1762652580.156876",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "fluently-lm/Llama-TI-8B-Instruct",
- "developer": "fluently-lm",
- "inference_platform": "unknown",
- "id": "fluently-lm/Llama-TI-8B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7716392505219485
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5252143041749421
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23036253776435045
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38134375000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37258976063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/fluently-sets/fluently-sets_FalconThink3-10B-IT/9329922e-7594-497d-bfab-9c8a18300dc9.json b/leaderboard_data/HFOpenLLMv2/fluently-sets/fluently-sets_FalconThink3-10B-IT/9329922e-7594-497d-bfab-9c8a18300dc9.json
deleted file mode 100644
index 24d16ee91df977530a22edd31c9cb8ca147e8c5c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/fluently-sets/fluently-sets_FalconThink3-10B-IT/9329922e-7594-497d-bfab-9c8a18300dc9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/fluently-sets_FalconThink3-10B-IT/1762652580.1573172",
- "retrieved_timestamp": "1762652580.1573179",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "fluently-sets/FalconThink3-10B-IT",
- "developer": "fluently-sets",
- "inference_platform": "unknown",
- "id": "fluently-sets/FalconThink3-10B-IT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7326216660682544
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.620016981648187
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24471299093655588
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3347315436241611
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44788541666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4434840425531915
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.306
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/fluently-sets/fluently-sets_reasoning-1-1k-demo/c63fc7e4-87ae-4516-ad3d-df95693133d5.json b/leaderboard_data/HFOpenLLMv2/fluently-sets/fluently-sets_reasoning-1-1k-demo/c63fc7e4-87ae-4516-ad3d-df95693133d5.json
deleted file mode 100644
index 7b495e56a8de7501135bc69b03c1d3b5ebc6eac8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/fluently-sets/fluently-sets_reasoning-1-1k-demo/c63fc7e4-87ae-4516-ad3d-df95693133d5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/fluently-sets_reasoning-1-1k-demo/1762652580.157624",
- "retrieved_timestamp": "1762652580.1576252",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "fluently-sets/reasoning-1-1k-demo",
- "developer": "fluently-sets",
- "inference_platform": "unknown",
- "id": "fluently-sets/reasoning-1-1k-demo"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7524800861713586
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6396692351083745
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4282477341389728
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33557046979865773
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4060625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4773936170212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp/936751f5-4483-4986-9a8c-cb002feb8858.json b/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp/936751f5-4483-4986-9a8c-cb002feb8858.json
deleted file mode 100644
index cb70c885d84f4345d09675532e97a11897b261f8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp/936751f5-4483-4986-9a8c-cb002feb8858.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/formulae_mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp/1762652580.1578538",
- "retrieved_timestamp": "1762652580.157855",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp",
- "developer": "formulae",
- "inference_platform": "unknown",
- "id": "formulae/mita-elite-sce-gen1.1-v1-7b-2-26-2025-exp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16139288199754429
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29763925404210967
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0015105740181268882
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2533557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4219375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11735372340425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-v1.1-7b-2-25-2025/7352f47c-8b57-477f-8190-b08b5b23dfb5.json b/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-v1.1-7b-2-25-2025/7352f47c-8b57-477f-8190-b08b5b23dfb5.json
deleted file mode 100644
index 9be7f11d84c45342d57ec5193f83092fd5304ddc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-v1.1-7b-2-25-2025/7352f47c-8b57-477f-8190-b08b5b23dfb5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/formulae_mita-elite-v1.1-7b-2-25-2025/1762652580.158112",
- "retrieved_timestamp": "1762652580.158113",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "formulae/mita-elite-v1.1-7b-2-25-2025",
- "developer": "formulae",
- "inference_platform": "unknown",
- "id": "formulae/mita-elite-v1.1-7b-2-25-2025"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1249728498162653
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28673660666639783
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2483221476510067
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3487291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10979055851063829
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-v1.1-gen2-7b-2-25-2025/106c33d2-84fb-4ea3-b2d3-78981834fdb0.json b/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-v1.1-gen2-7b-2-25-2025/106c33d2-84fb-4ea3-b2d3-78981834fdb0.json
deleted file mode 100644
index faf317995d58f4c085058f8113c848589c9f4112..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-v1.1-gen2-7b-2-25-2025/106c33d2-84fb-4ea3-b2d3-78981834fdb0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/formulae_mita-elite-v1.1-gen2-7b-2-25-2025/1762652580.158336",
- "retrieved_timestamp": "1762652580.158336",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "formulae/mita-elite-v1.1-gen2-7b-2-25-2025",
- "developer": "formulae",
- "inference_platform": "unknown",
- "id": "formulae/mita-elite-v1.1-gen2-7b-2-25-2025"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14108454456397912
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.292375183445424
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2525167785234899
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35409375000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11012300531914894
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-v1.2-7b-2-26-2025/761560dc-3a0b-481f-8ec2-4d1ea97cfa6f.json b/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-v1.2-7b-2-26-2025/761560dc-3a0b-481f-8ec2-4d1ea97cfa6f.json
deleted file mode 100644
index aa0ee908553c481278a8046e97ed47e7b096cdfd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-elite-v1.2-7b-2-26-2025/761560dc-3a0b-481f-8ec2-4d1ea97cfa6f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/formulae_mita-elite-v1.2-7b-2-26-2025/1762652580.158752",
- "retrieved_timestamp": "1762652580.158756",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "formulae/mita-elite-v1.2-7b-2-26-2025",
- "developer": "formulae",
- "inference_platform": "unknown",
- "id": "formulae/mita-elite-v1.2-7b-2-26-2025"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14800396281865452
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29300480737441686
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0022658610271903325
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4286666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1186003989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-gen3-7b-2-26-2025/0aa40e02-762d-4a80-932f-f967057c4f50.json b/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-gen3-7b-2-26-2025/0aa40e02-762d-4a80-932f-f967057c4f50.json
deleted file mode 100644
index c1e925df3828abac2a78d51d55103e067c821bb4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-gen3-7b-2-26-2025/0aa40e02-762d-4a80-932f-f967057c4f50.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/formulae_mita-gen3-7b-2-26-2025/1762652580.159164",
- "retrieved_timestamp": "1762652580.159165",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "formulae/mita-gen3-7b-2-26-2025",
- "developer": "formulae",
- "inference_platform": "unknown",
- "id": "formulae/mita-gen3-7b-2-26-2025"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1964144026737944
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2915705776174771
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0022658610271903325
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3912083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11236702127659574
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-gen3-v1.2-7b-2-26-2025/a28f8779-d2df-4371-b946-472b335f3ca3.json b/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-gen3-v1.2-7b-2-26-2025/a28f8779-d2df-4371-b946-472b335f3ca3.json
deleted file mode 100644
index 5b4f474601a3ac1fc7e4377f86abf530d48c3b46..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-gen3-v1.2-7b-2-26-2025/a28f8779-d2df-4371-b946-472b335f3ca3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/formulae_mita-gen3-v1.2-7b-2-26-2025/1762652580.15945",
- "retrieved_timestamp": "1762652580.1594508",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "formulae/mita-gen3-v1.2-7b-2-26-2025",
- "developer": "formulae",
- "inference_platform": "unknown",
- "id": "formulae/mita-gen3-v1.2-7b-2-26-2025"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2043577707150361
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30577476935056
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0022658610271903325
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38999999999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11278257978723404
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-math-v2.3-2-25-2025/fa005333-c7b5-4494-a8cb-4edb1f7d00b9.json b/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-math-v2.3-2-25-2025/fa005333-c7b5-4494-a8cb-4edb1f7d00b9.json
deleted file mode 100644
index 23da59f95a3b4b4afbec847acf46f3c6dc0a3d96..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-math-v2.3-2-25-2025/fa005333-c7b5-4494-a8cb-4edb1f7d00b9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/formulae_mita-math-v2.3-2-25-2025/1762652580.159737",
- "retrieved_timestamp": "1762652580.159738",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "formulae/mita-math-v2.3-2-25-2025",
- "developer": "formulae",
- "inference_platform": "unknown",
- "id": "formulae/mita-math-v2.3-2-25-2025"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13733781920858879
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2949403673764691
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25083892617449666
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36975
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11178523936170212
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-v1-7b/9c629542-6fd0-4cd1-90c7-7f1e95a7a25e.json b/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-v1-7b/9c629542-6fd0-4cd1-90c7-7f1e95a7a25e.json
deleted file mode 100644
index 432a63d0695dcb1e5a21a650929df2a328eba43a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-v1-7b/9c629542-6fd0-4cd1-90c7-7f1e95a7a25e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/formulae_mita-v1-7b/1762652580.160087",
- "retrieved_timestamp": "1762652580.160088",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "formulae/mita-v1-7b",
- "developer": "formulae",
- "inference_platform": "unknown",
- "id": "formulae/mita-v1-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19723888172271792
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3003216459152819
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.002265861027190332
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41520833333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1146941489361702
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-v1.1-7b-2-24-2025/332cbdd8-96b7-40d5-87c6-3610dcbcdc54.json b/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-v1.1-7b-2-24-2025/332cbdd8-96b7-40d5-87c6-3610dcbcdc54.json
deleted file mode 100644
index 12145041c2ffa3b810100677d53a2edbf071589e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-v1.1-7b-2-24-2025/332cbdd8-96b7-40d5-87c6-3610dcbcdc54.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/formulae_mita-v1.1-7b-2-24-2025/1762652580.1604211",
- "retrieved_timestamp": "1762652580.1604218",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "formulae/mita-v1.1-7b-2-24-2025",
- "developer": "formulae",
- "inference_platform": "unknown",
- "id": "formulae/mita-v1.1-7b-2-24-2025"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34122018466557624
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5442430910797442
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4350453172205438
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3145973154362416
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45569791666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4523769946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-v1.2-7b-2-24-2025/a07149d4-66e5-4a0d-b4ae-b696027e821c.json b/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-v1.2-7b-2-24-2025/a07149d4-66e5-4a0d-b4ae-b696027e821c.json
deleted file mode 100644
index 0d4d97207b3659cb93603ca33fbfa61ac4c7d0ca..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/formulae/formulae_mita-v1.2-7b-2-24-2025/a07149d4-66e5-4a0d-b4ae-b696027e821c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/formulae_mita-v1.2-7b-2-24-2025/1762652580.160727",
- "retrieved_timestamp": "1762652580.160728",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "formulae/mita-v1.2-7b-2-24-2025",
- "developer": "formulae",
- "inference_platform": "unknown",
- "id": "formulae/mita-v1.2-7b-2-24-2025"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.256415200556745
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4919464940215105
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4879154078549849
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4343958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33585438829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/frameai/frameai_Loxa-4B/b8ac82ef-a231-43ee-aaf2-23b0830cfbc3.json b/leaderboard_data/HFOpenLLMv2/frameai/frameai_Loxa-4B/b8ac82ef-a231-43ee-aaf2-23b0830cfbc3.json
deleted file mode 100644
index 7306c5d6242be94297a809614c368599df0adea8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/frameai/frameai_Loxa-4B/b8ac82ef-a231-43ee-aaf2-23b0830cfbc3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/frameai_Loxa-4B/1762652580.160984",
- "retrieved_timestamp": "1762652580.160984",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "frameai/Loxa-4B",
- "developer": "frameai",
- "inference_platform": "unknown",
- "id": "frameai/Loxa-4B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47648350820268
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42171373309002896
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1095166163141994
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2835570469798658
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33765625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28016954787234044
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 4.018
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/freewheelin/freewheelin_free-solar-evo-v0.1/c2438204-5b2b-41ce-aa95-27afad6f61a9.json b/leaderboard_data/HFOpenLLMv2/freewheelin/freewheelin_free-solar-evo-v0.1/c2438204-5b2b-41ce-aa95-27afad6f61a9.json
deleted file mode 100644
index bc627056b134fe3b59b8233b50964816573e1c39..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/freewheelin/freewheelin_free-solar-evo-v0.1/c2438204-5b2b-41ce-aa95-27afad6f61a9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/freewheelin_free-solar-evo-v0.1/1762652580.16175",
- "retrieved_timestamp": "1762652580.161752",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "freewheelin/free-solar-evo-v0.1",
- "developer": "freewheelin",
- "inference_platform": "unknown",
- "id": "freewheelin/free-solar-evo-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20500715878313985
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4502211109638701
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.008308157099697885
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4945833333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3414228723404255
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.732
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/freewheelin/freewheelin_free-solar-evo-v0.11/d2180e09-02da-48d2-adf6-1710299b272e.json b/leaderboard_data/HFOpenLLMv2/freewheelin/freewheelin_free-solar-evo-v0.11/d2180e09-02da-48d2-adf6-1710299b272e.json
deleted file mode 100644
index 98c0bb3750e3394ff6715838ec9a2673d2a3d97c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/freewheelin/freewheelin_free-solar-evo-v0.11/d2180e09-02da-48d2-adf6-1710299b272e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/freewheelin_free-solar-evo-v0.11/1762652580.1621969",
- "retrieved_timestamp": "1762652580.162198",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "freewheelin/free-solar-evo-v0.11",
- "developer": "freewheelin",
- "inference_platform": "unknown",
- "id": "freewheelin/free-solar-evo-v0.11"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20265894493277836
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4545155032474882
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.008308157099697885
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5052187499999999
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34674202127659576
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.732
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/freewheelin/freewheelin_free-solar-evo-v0.13/6f6887bf-961c-4b6b-a285-a78459a46488.json b/leaderboard_data/HFOpenLLMv2/freewheelin/freewheelin_free-solar-evo-v0.13/6f6887bf-961c-4b6b-a285-a78459a46488.json
deleted file mode 100644
index f85188ccca6d6a47879420d982b3860e15400daa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/freewheelin/freewheelin_free-solar-evo-v0.13/6f6887bf-961c-4b6b-a285-a78459a46488.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/freewheelin_free-solar-evo-v0.13/1762652580.1624699",
- "retrieved_timestamp": "1762652580.1624708",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "freewheelin/free-solar-evo-v0.13",
- "developer": "freewheelin",
- "inference_platform": "unknown",
- "id": "freewheelin/free-solar-evo-v0.13"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2320598234905606
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4554839670962904
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.012084592145015106
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28859060402684567
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.50515625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34699135638297873
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.732
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/gabrielmbmb/gabrielmbmb_SmolLM-1.7B-Instruct-IFEval/6e3decae-f2a9-4f71-9511-76d28a675cc2.json b/leaderboard_data/HFOpenLLMv2/gabrielmbmb/gabrielmbmb_SmolLM-1.7B-Instruct-IFEval/6e3decae-f2a9-4f71-9511-76d28a675cc2.json
deleted file mode 100644
index 954c27d98be1ace663c8f2e983ceb21ab32e6ccd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/gabrielmbmb/gabrielmbmb_SmolLM-1.7B-Instruct-IFEval/6e3decae-f2a9-4f71-9511-76d28a675cc2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/gabrielmbmb_SmolLM-1.7B-Instruct-IFEval/1762652580.162997",
- "retrieved_timestamp": "1762652580.162998",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "gabrielmbmb/SmolLM-1.7B-Instruct-IFEval",
- "developer": "gabrielmbmb",
- "inference_platform": "unknown",
- "id": "gabrielmbmb/SmolLM-1.7B-Instruct-IFEval"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23058595637353335
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313843378282092
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.010574018126888218
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2533557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33276041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11560837765957446
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 1.711
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/gaverfraxz/gaverfraxz_Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA/3666aa17-279d-4f0b-a6c2-2c8198729df9.json b/leaderboard_data/HFOpenLLMv2/gaverfraxz/gaverfraxz_Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA/3666aa17-279d-4f0b-a6c2-2c8198729df9.json
deleted file mode 100644
index 676f5a94b2ca3c8c9c48ff4ee5239a5787a184b2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/gaverfraxz/gaverfraxz_Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA/3666aa17-279d-4f0b-a6c2-2c8198729df9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/gaverfraxz_Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA/1762652580.163272",
- "retrieved_timestamp": "1762652580.1632729",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA",
- "developer": "gaverfraxz",
- "inference_platform": "unknown",
- "id": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-DELLA"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40094615619888563
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3984844272016949
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.019637462235649546
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36504166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16539228723404256
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/gaverfraxz/gaverfraxz_Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES/83a638be-6f3d-4d5b-b1de-6515634aebbd.json b/leaderboard_data/HFOpenLLMv2/gaverfraxz/gaverfraxz_Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES/83a638be-6f3d-4d5b-b1de-6515634aebbd.json
deleted file mode 100644
index 78e4f39ff54c017fcab1b6ca0eca84b9bf41c146..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/gaverfraxz/gaverfraxz_Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES/83a638be-6f3d-4d5b-b1de-6515634aebbd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/gaverfraxz_Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES/1762652580.163549",
- "retrieved_timestamp": "1762652580.16355",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES",
- "developer": "gaverfraxz",
- "inference_platform": "unknown",
- "id": "gaverfraxz/Meta-Llama-3.1-8B-Instruct-HalfAbliterated-TIES"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45505148561372716
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5043660783243713
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1299093655589124
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26677852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36785239361702127
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ghost-x/ghost-x_ghost-8b-beta-1608/b5fba89f-ec8f-4e71-ad19-32c7d85698fb.json b/leaderboard_data/HFOpenLLMv2/ghost-x/ghost-x_ghost-8b-beta-1608/b5fba89f-ec8f-4e71-ad19-32c7d85698fb.json
deleted file mode 100644
index 9e69add894b2d5a46ca1f8a813f9b63e126447d4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ghost-x/ghost-x_ghost-8b-beta-1608/b5fba89f-ec8f-4e71-ad19-32c7d85698fb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ghost-x_ghost-8b-beta-1608/1762652580.16434",
- "retrieved_timestamp": "1762652580.164341",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ghost-x/ghost-8b-beta-1608",
- "developer": "ghost-x",
- "inference_platform": "unknown",
- "id": "ghost-x/ghost-8b-beta-1608"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42727407722620425
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45165496100352914
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06948640483383686
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25838926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35158333333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2839926861702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_SahabatAI-MediChatIndo-8B-v1/61543864-320f-41ef-889d-7c0e95a229bd.json b/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_SahabatAI-MediChatIndo-8B-v1/61543864-320f-41ef-889d-7c0e95a229bd.json
deleted file mode 100644
index e65ff7a567eaae5a2222d2d938c69d93bff87914..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_SahabatAI-MediChatIndo-8B-v1/61543864-320f-41ef-889d-7c0e95a229bd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/gmonsoon_SahabatAI-MediChatIndo-8B-v1/1762652580.165248",
- "retrieved_timestamp": "1762652580.165249",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "gmonsoon/SahabatAI-MediChatIndo-8B-v1",
- "developer": "gmonsoon",
- "inference_platform": "unknown",
- "id": "gmonsoon/SahabatAI-MediChatIndo-8B-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41628323958208663
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4508834027881236
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.061933534743202415
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3753958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3107546542553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_SahabatAI-Rebase-8B-Test/a7daa424-7b22-4320-bddd-be350d54b08d.json b/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_SahabatAI-Rebase-8B-Test/a7daa424-7b22-4320-bddd-be350d54b08d.json
deleted file mode 100644
index 09df6239d20e320ec32117322f0956630124212f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_SahabatAI-Rebase-8B-Test/a7daa424-7b22-4320-bddd-be350d54b08d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/gmonsoon_SahabatAI-Rebase-8B-Test/1762652580.165493",
- "retrieved_timestamp": "1762652580.165493",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "gmonsoon/SahabatAI-Rebase-8B-Test",
- "developer": "gmonsoon",
- "inference_platform": "unknown",
- "id": "gmonsoon/SahabatAI-Rebase-8B-Test"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5156263159527831
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.522960549734047
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1148036253776435
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.287751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41328125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3663563829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_StockSeaLLMs-7B-v1/ac53d663-0e5c-4a7e-8d9d-efcd70d39b10.json b/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_StockSeaLLMs-7B-v1/ac53d663-0e5c-4a7e-8d9d-efcd70d39b10.json
deleted file mode 100644
index 5bc2507c2a056317329069b0b79668b347c612bf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_StockSeaLLMs-7B-v1/ac53d663-0e5c-4a7e-8d9d-efcd70d39b10.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/gmonsoon_StockSeaLLMs-7B-v1/1762652580.165695",
- "retrieved_timestamp": "1762652580.165696",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "gmonsoon/StockSeaLLMs-7B-v1",
- "developer": "gmonsoon",
- "inference_platform": "unknown",
- "id": "gmonsoon/StockSeaLLMs-7B-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4599218961245052
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5271087932535433
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19637462235649547
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.421375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39519614361702127
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_gemma2-9b-sahabatai-v1-instruct-BaseTIES/6d500e75-5605-4268-88a1-dc4abc7c5a7f.json b/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_gemma2-9b-sahabatai-v1-instruct-BaseTIES/6d500e75-5605-4268-88a1-dc4abc7c5a7f.json
deleted file mode 100644
index e80d81d20eff3724f3bd2d7dc821162aa35cdaad..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/gmonsoon/gmonsoon_gemma2-9b-sahabatai-v1-instruct-BaseTIES/6d500e75-5605-4268-88a1-dc4abc7c5a7f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/gmonsoon_gemma2-9b-sahabatai-v1-instruct-BaseTIES/1762652580.165903",
- "retrieved_timestamp": "1762652580.1659038",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES",
- "developer": "gmonsoon",
- "inference_platform": "unknown",
- "id": "gmonsoon/gemma2-9b-sahabatai-v1-instruct-BaseTIES"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7377923908562614
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6077244532441547
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19939577039274925
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47780208333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43467420212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_full_2/3c550631-c27c-4743-98f3-3ab65c5fa906.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_full_2/3c550631-c27c-4743-98f3-3ab65c5fa906.json
deleted file mode 100644
index 0f3614ebc66f8afa1b3b8c39ca14b919831bf7f8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_full_2/3c550631-c27c-4743-98f3-3ab65c5fa906.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_full_2/1762652580.166118",
- "retrieved_timestamp": "1762652580.166118",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/alpaca_data_full_2",
- "developer": "godlikehhd",
- "inference_platform": "unknown",
- "id": "godlikehhd/alpaca_data_full_2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31781450994472443
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4216953430035033
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09290030211480363
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40515625000000005
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.285405585106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_full_3B/d7d6baf0-00d3-4960-970c-949bb9919ac9.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_full_3B/d7d6baf0-00d3-4960-970c-949bb9919ac9.json
deleted file mode 100644
index 4d89ce685ccf42397421c5d7c11ff2ce005c3658..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_full_3B/d7d6baf0-00d3-4960-970c-949bb9919ac9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_full_3B/1762652580.166356",
- "retrieved_timestamp": "1762652580.166357",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/alpaca_data_full_3B",
- "developer": "godlikehhd",
- "inference_platform": "unknown",
- "id": "godlikehhd/alpaca_data_full_3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36957162550920447
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46841893776834337
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1336858006042296
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4954791666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.335688164893617
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_max_2600/017ca821-f6ea-43bc-bac1-28dd30c2341d.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_max_2600/017ca821-f6ea-43bc-bac1-28dd30c2341d.json
deleted file mode 100644
index 82eaff22a493b312d11a767b7e17c4b62f27de55..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_max_2600/017ca821-f6ea-43bc-bac1-28dd30c2341d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ifd_max_2600/1762652580.16661",
- "retrieved_timestamp": "1762652580.166613",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/alpaca_data_ifd_max_2600",
- "developer": "godlikehhd",
- "inference_platform": "unknown",
- "id": "godlikehhd/alpaca_data_ifd_max_2600"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3042504997850149
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40285133876405865
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09894259818731117
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3508645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29163896276595747
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_max_2600_3B/41d72b83-3c55-460f-9d21-88866eed6b9a.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_max_2600_3B/41d72b83-3c55-460f-9d21-88866eed6b9a.json
deleted file mode 100644
index 67d4f12fea3f02156ac8b32de005fb7e13dabef3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_max_2600_3B/41d72b83-3c55-460f-9d21-88866eed6b9a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ifd_max_2600_3B/1762652580.1669528",
- "retrieved_timestamp": "1762652580.166954",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/alpaca_data_ifd_max_2600_3B",
- "developer": "godlikehhd",
- "inference_platform": "unknown",
- "id": "godlikehhd/alpaca_data_ifd_max_2600_3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.298155560579263
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4626377955326701
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1593655589123867
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43455208333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32878989361702127
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_me_max_5200/e2f13357-053c-42e5-8149-465b4f16d334.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_me_max_5200/e2f13357-053c-42e5-8149-465b4f16d334.json
deleted file mode 100644
index ef312cdf869e24d2b19b65af2f61eaff12a00311..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_me_max_5200/e2f13357-053c-42e5-8149-465b4f16d334.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ifd_me_max_5200/1762652580.167201",
- "retrieved_timestamp": "1762652580.167202",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/alpaca_data_ifd_me_max_5200",
- "developer": "godlikehhd",
- "inference_platform": "unknown",
- "id": "godlikehhd/alpaca_data_ifd_me_max_5200"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36832271705740766
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4153453015610935
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09743202416918428
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3482604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29820478723404253
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_min_2600/5561b7bd-bd90-445c-b969-8d400e99e629.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_min_2600/5561b7bd-bd90-445c-b969-8d400e99e629.json
deleted file mode 100644
index 112fd0348d6e86c69c439f1bb3652d3527d082d6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ifd_min_2600/5561b7bd-bd90-445c-b969-8d400e99e629.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ifd_min_2600/1762652580.167441",
- "retrieved_timestamp": "1762652580.167443",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/alpaca_data_ifd_min_2600",
- "developer": "godlikehhd",
- "inference_platform": "unknown",
- "id": "godlikehhd/alpaca_data_ifd_min_2600"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3749673089624419
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4219047173013076
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09667673716012085
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36562500000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.289311835106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_ans_max_5200/9c2cee8b-3f35-4a49-814e-ad316fcede7f.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_ans_max_5200/9c2cee8b-3f35-4a49-814e-ad316fcede7f.json
deleted file mode 100644
index fcc43fe134c3587eaa3393e1f2a8fe78e44baf45..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_ans_max_5200/9c2cee8b-3f35-4a49-814e-ad316fcede7f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ins_ans_max_5200/1762652580.167691",
- "retrieved_timestamp": "1762652580.1676931",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/alpaca_data_ins_ans_max_5200",
- "developer": "godlikehhd",
- "inference_platform": "unknown",
- "id": "godlikehhd/alpaca_data_ins_ans_max_5200"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34786477657061043
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40982060224148426
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1027190332326284
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3601666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2900598404255319
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_max_5200/cdd1de41-4e85-4872-be9f-e3af4e9221a9.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_max_5200/cdd1de41-4e85-4872-be9f-e3af4e9221a9.json
deleted file mode 100644
index 5f09f888b5aff912800153a18729a6d3fbca7193..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_max_5200/cdd1de41-4e85-4872-be9f-e3af4e9221a9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ins_max_5200/1762652580.1679769",
- "retrieved_timestamp": "1762652580.167978",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/alpaca_data_ins_max_5200",
- "developer": "godlikehhd",
- "inference_platform": "unknown",
- "id": "godlikehhd/alpaca_data_ins_max_5200"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32750657145263457
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41550742328078477
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09969788519637462
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.361375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2915558510638298
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_min_2600/121f28df-65d6-4a48-aa77-4ee794034032.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_min_2600/121f28df-65d6-4a48-aa77-4ee794034032.json
deleted file mode 100644
index acaab98030218f910a63e613099efb061ba341ca..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_min_2600/121f28df-65d6-4a48-aa77-4ee794034032.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ins_min_2600/1762652580.1682088",
- "retrieved_timestamp": "1762652580.16821",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/alpaca_data_ins_min_2600",
- "developer": "godlikehhd",
- "inference_platform": "unknown",
- "id": "godlikehhd/alpaca_data_ins_min_2600"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33300199027469335
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41873469888886056
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11102719033232629
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38534375000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28798204787234044
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_min_5200/d976888b-5e17-4e5c-b557-0b48bf36d4f7.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_min_5200/d976888b-5e17-4e5c-b557-0b48bf36d4f7.json
deleted file mode 100644
index 0be2526ebd6a8bc2dbaa324c40d58e9c81650225..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_ins_min_5200/d976888b-5e17-4e5c-b557-0b48bf36d4f7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_ins_min_5200/1762652580.1684108",
- "retrieved_timestamp": "1762652580.1684108",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/alpaca_data_ins_min_5200",
- "developer": "godlikehhd",
- "inference_platform": "unknown",
- "id": "godlikehhd/alpaca_data_ins_min_5200"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3359995921931586
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4289279419241076
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10347432024169184
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28691275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39055208333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29488031914893614
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_sampled_ifd_5200/e7ca66f4-852b-4b5b-8781-d6272a43c559.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_sampled_ifd_5200/e7ca66f4-852b-4b5b-8781-d6272a43c559.json
deleted file mode 100644
index c106b86217e361ef245c888c6533c1db8928bdc2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_sampled_ifd_5200/e7ca66f4-852b-4b5b-8781-d6272a43c559.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_sampled_ifd_5200/1762652580.1686149",
- "retrieved_timestamp": "1762652580.1686149",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/alpaca_data_sampled_ifd_5200",
- "developer": "godlikehhd",
- "inference_platform": "unknown",
- "id": "godlikehhd/alpaca_data_sampled_ifd_5200"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2923853154075631
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4032969715626326
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12537764350453173
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3520729166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2896442819148936
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_sampled_ifd_new_5200/906db90c-7ea4-4878-aa01-06fd1ad0d18a.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_sampled_ifd_new_5200/906db90c-7ea4-4878-aa01-06fd1ad0d18a.json
deleted file mode 100644
index f0dec574729b3655f788361c0c969c4a09e01812..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_sampled_ifd_new_5200/906db90c-7ea4-4878-aa01-06fd1ad0d18a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_sampled_ifd_new_5200/1762652580.1688168",
- "retrieved_timestamp": "1762652580.168818",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/alpaca_data_sampled_ifd_new_5200",
- "developer": "godlikehhd",
- "inference_platform": "unknown",
- "id": "godlikehhd/alpaca_data_sampled_ifd_new_5200"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36632468516868577
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4177831234050982
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09441087613293052
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29247007978723405
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_0.1_2600/08195b61-5fe5-4cce-8da4-34b731289278.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_0.1_2600/08195b61-5fe5-4cce-8da4-34b731289278.json
deleted file mode 100644
index d4344835f39491397d211ed3834ebeb56487c382..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_0.1_2600/08195b61-5fe5-4cce-8da4-34b731289278.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_0.1_2600/1762652580.1691651",
- "retrieved_timestamp": "1762652580.169167",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/alpaca_data_score_max_0.1_2600",
- "developer": "godlikehhd",
- "inference_platform": "unknown",
- "id": "godlikehhd/alpaca_data_score_max_0.1_2600"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3287554799044313
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42522607952607777
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09894259818731117
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37064583333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29230385638297873
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_0.3_2600/40e4c93e-7a54-49c2-b513-33edd87f59b0.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_0.3_2600/40e4c93e-7a54-49c2-b513-33edd87f59b0.json
deleted file mode 100644
index d1f4fe128847e0269ed60fda530047e1dfea170d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_0.3_2600/40e4c93e-7a54-49c2-b513-33edd87f59b0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_0.3_2600/1762652580.1694138",
- "retrieved_timestamp": "1762652580.169415",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/alpaca_data_score_max_0.3_2600",
- "developer": "godlikehhd",
- "inference_platform": "unknown",
- "id": "godlikehhd/alpaca_data_score_max_0.3_2600"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33752332699459653
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4151448369012765
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10347432024169184
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37594791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29130651595744683
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_0.7_2600/988c6ec3-e967-4cec-993b-e060a5a18e97.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_0.7_2600/988c6ec3-e967-4cec-993b-e060a5a18e97.json
deleted file mode 100644
index ab82067f5cbcb6138dab5fc9f1157dcfdb0c9336..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_0.7_2600/988c6ec3-e967-4cec-993b-e060a5a18e97.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_0.7_2600/1762652580.169624",
- "retrieved_timestamp": "1762652580.169625",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/alpaca_data_score_max_0.7_2600",
- "developer": "godlikehhd",
- "inference_platform": "unknown",
- "id": "godlikehhd/alpaca_data_score_max_0.7_2600"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3639764713183243
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41845266250678703
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10725075528700906
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3468645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2982878989361702
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_2500/b6fd288d-36d5-4499-bf2d-da1fdd1120c5.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_2500/b6fd288d-36d5-4499-bf2d-da1fdd1120c5.json
deleted file mode 100644
index 40267fce6d8cc4d2a644329218fe277307ac923c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_2500/b6fd288d-36d5-4499-bf2d-da1fdd1120c5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_2500/1762652580.1698968",
- "retrieved_timestamp": "1762652580.169898",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/alpaca_data_score_max_2500",
- "developer": "godlikehhd",
- "inference_platform": "unknown",
- "id": "godlikehhd/alpaca_data_score_max_2500"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3563577973111345
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41801375075895447
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09516616314199396
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36270833333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2939660904255319
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_2600_3B/92dc5ec0-5aea-45f5-9237-32b5a65e095b.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_2600_3B/92dc5ec0-5aea-45f5-9237-32b5a65e095b.json
deleted file mode 100644
index 3a2bc19ad25aa94f72bc1b7afa62bbac565ca639..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_2600_3B/92dc5ec0-5aea-45f5-9237-32b5a65e095b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_2600_3B/1762652580.170121",
- "retrieved_timestamp": "1762652580.170122",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/alpaca_data_score_max_2600_3B",
- "developer": "godlikehhd",
- "inference_platform": "unknown",
- "id": "godlikehhd/alpaca_data_score_max_2600_3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33577463352792813
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4716306839273412
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15483383685800603
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44744791666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3341921542553192
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.086
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_5200/d877dbd4-b3da-44b5-974a-1267db396435.json b/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_5200/d877dbd4-b3da-44b5-974a-1267db396435.json
deleted file mode 100644
index b4e353fd2aceab0d77dd190f2e3a0ae9ae206c73..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/godlikehhd/godlikehhd_alpaca_data_score_max_5200/d877dbd4-b3da-44b5-974a-1267db396435.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/godlikehhd_alpaca_data_score_max_5200/1762652580.170327",
- "retrieved_timestamp": "1762652580.170327",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "godlikehhd/alpaca_data_score_max_5200",
- "developer": "godlikehhd",
- "inference_platform": "unknown",
- "id": "godlikehhd/alpaca_data_score_max_5200"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34454248061809334
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42417102847687554
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09743202416918428
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3877916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446476063829785
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/AALF_gemma-2-27b-it-SimPO-37K-100steps/214ebe7f-357a-435c-9bf5-451bdea1ca9a.json b/leaderboard_data/HFOpenLLMv2/google/AALF_gemma-2-27b-it-SimPO-37K-100steps/214ebe7f-357a-435c-9bf5-451bdea1ca9a.json
deleted file mode 100644
index 3d5393da256807f9c43ea1f90b4c8eca30fce1cf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/AALF_gemma-2-27b-it-SimPO-37K-100steps/214ebe7f-357a-435c-9bf5-451bdea1ca9a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AALF_gemma-2-27b-it-SimPO-37K-100steps/1762652579.472713",
- "retrieved_timestamp": "1762652579.472714",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AALF/gemma-2-27b-it-SimPO-37K-100steps",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "AALF/gemma-2-27b-it-SimPO-37K-100steps"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2567642743476199
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39308230769885016
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.021148036253776436
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28859060402684567
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3329166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21251662234042554
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/AALF_gemma-2-27b-it-SimPO-37K/878ec84b-a365-4887-b7fd-1dc738f6eda8.json b/leaderboard_data/HFOpenLLMv2/google/AALF_gemma-2-27b-it-SimPO-37K/878ec84b-a365-4887-b7fd-1dc738f6eda8.json
deleted file mode 100644
index 6b04094170c7adef40bf07ee3ea501e2ccff7b0d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/AALF_gemma-2-27b-it-SimPO-37K/878ec84b-a365-4887-b7fd-1dc738f6eda8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AALF_gemma-2-27b-it-SimPO-37K/1762652579.472391",
- "retrieved_timestamp": "1762652579.4723918",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AALF/gemma-2-27b-it-SimPO-37K",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "AALF/gemma-2-27b-it-SimPO-37K"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24065257959990605
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3911343917952534
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01283987915407855
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3487604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1971409574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/AELLM_gemma-2-aeria-infinity-9b/93d08946-76b5-4547-8bf0-966c5cccd8c1.json b/leaderboard_data/HFOpenLLMv2/google/AELLM_gemma-2-aeria-infinity-9b/93d08946-76b5-4547-8bf0-966c5cccd8c1.json
deleted file mode 100644
index 64916bbd45694586ead2b5936d8fe5172e2e9149..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/AELLM_gemma-2-aeria-infinity-9b/93d08946-76b5-4547-8bf0-966c5cccd8c1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AELLM_gemma-2-aeria-infinity-9b/1762652579.4729412",
- "retrieved_timestamp": "1762652579.472942",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AELLM/gemma-2-aeria-infinity-9b",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "AELLM/gemma-2-aeria-infinity-9b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.759399504426034
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5983336669577649
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21450151057401812
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3338926174496644
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40196875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38622007978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/AELLM_gemma-2-lyco-infinity-9b/fa16a47e-4009-487b-8252-1fef155ce6b4.json b/leaderboard_data/HFOpenLLMv2/google/AELLM_gemma-2-lyco-infinity-9b/fa16a47e-4009-487b-8252-1fef155ce6b4.json
deleted file mode 100644
index 6a00de2f8127765c9ccb3b46d99012738fcd7601..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/AELLM_gemma-2-lyco-infinity-9b/fa16a47e-4009-487b-8252-1fef155ce6b4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/AELLM_gemma-2-lyco-infinity-9b/1762652579.473207",
- "retrieved_timestamp": "1762652579.473208",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "AELLM/gemma-2-lyco-infinity-9b",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "AELLM/gemma-2-lyco-infinity-9b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7316475839660989
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5839534871023703
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17069486404833836
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32802013422818793
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40063541666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.378656914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/Aashraf995_Gemma-Evo-10B/15b910c7-6c36-4af8-af78-d48278dbc4db.json b/leaderboard_data/HFOpenLLMv2/google/Aashraf995_Gemma-Evo-10B/15b910c7-6c36-4af8-af78-d48278dbc4db.json
deleted file mode 100644
index ee316bac2aa616ed2f71bc17734d7a3e101c0b62..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/Aashraf995_Gemma-Evo-10B/15b910c7-6c36-4af8-af78-d48278dbc4db.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Aashraf995_Gemma-Evo-10B/1762652579.476305",
- "retrieved_timestamp": "1762652579.476305",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Aashraf995/Gemma-Evo-10B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "Aashraf995/Gemma-Evo-10B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7332211864519476
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6044352897552882
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22280966767371602
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3540268456375839
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45947916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4275265957446808
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/BAAI_Gemma2-9B-IT-Simpo-Infinity-Preference/0f948238-5ed2-41ee-a815-3ff20728de89.json b/leaderboard_data/HFOpenLLMv2/google/BAAI_Gemma2-9B-IT-Simpo-Infinity-Preference/0f948238-5ed2-41ee-a815-3ff20728de89.json
deleted file mode 100644
index 4e6900f5ff45d35187e7485dc5897e8599cebe42..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/BAAI_Gemma2-9B-IT-Simpo-Infinity-Preference/0f948238-5ed2-41ee-a815-3ff20728de89.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BAAI_Gemma2-9B-IT-Simpo-Infinity-Preference/1762652579.487571",
- "retrieved_timestamp": "1762652579.487571",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "BAAI/Gemma2-9B-IT-Simpo-Infinity-Preference"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31763831079314
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5979459664230056
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09743202416918428
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33976510067114096
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39657291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3868849734042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/BlackBeenie_Neos-Gemma-2-9b/ea9ebbaa-fb04-491d-adc2-0389cb5d1ef6.json b/leaderboard_data/HFOpenLLMv2/google/BlackBeenie_Neos-Gemma-2-9b/ea9ebbaa-fb04-491d-adc2-0389cb5d1ef6.json
deleted file mode 100644
index 155351adf699881a50894e56817bf4d24132e5c4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/BlackBeenie_Neos-Gemma-2-9b/ea9ebbaa-fb04-491d-adc2-0389cb5d1ef6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/BlackBeenie_Neos-Gemma-2-9b/1762652579.4958751",
- "retrieved_timestamp": "1762652579.495876",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "BlackBeenie/Neos-Gemma-2-9b",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "BlackBeenie/Neos-Gemma-2-9b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5875665456544192
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5502975126048852
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09818731117824774
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32298657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36175
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39810505319148937
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/Columbia-NLP_LION-Gemma-2b-odpo-v1.0/25418041-6fe1-4cd8-88cb-79456a65210c.json b/leaderboard_data/HFOpenLLMv2/google/Columbia-NLP_LION-Gemma-2b-odpo-v1.0/25418041-6fe1-4cd8-88cb-79456a65210c.json
deleted file mode 100644
index 7caf7c97ccbda5bb361dbed14c1f0efa1f8c25a0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/Columbia-NLP_LION-Gemma-2b-odpo-v1.0/25418041-6fe1-4cd8-88cb-79456a65210c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Columbia-NLP_LION-Gemma-2b-odpo-v1.0/1762652579.507273",
- "retrieved_timestamp": "1762652579.507273",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Columbia-NLP/LION-Gemma-2b-odpo-v1.0",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "Columbia-NLP/LION-Gemma-2b-odpo-v1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30664858131978706
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3895836210706875
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06948640483383686
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2424496644295302
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42791666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1692154255319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 2.506
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-9B/a639bba5-4d0e-4d0b-826a-3eb4d0ccebab.json b/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-9B/a639bba5-4d0e-4d0b-826a-3eb4d0ccebab.json
deleted file mode 100644
index 6327cb5efb667284be1fdc72c6174cd0453ae704..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-9B/a639bba5-4d0e-4d0b-826a-3eb4d0ccebab.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-9B/1762652579.539702",
- "retrieved_timestamp": "1762652579.5397062",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DavidAU/Gemma-The-Writer-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "DavidAU/Gemma-The-Writer-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17403156956874427
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5905439384199537
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08761329305135952
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34563758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.409875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39793882978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-DEADLINE-10B/66d2e2a4-a75c-4fb9-af6a-3181f17281af.json b/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-DEADLINE-10B/66d2e2a4-a75c-4fb9-af6a-3181f17281af.json
deleted file mode 100644
index 483a78277385b8cce8072ab1faf599cd881154bc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-DEADLINE-10B/66d2e2a4-a75c-4fb9-af6a-3181f17281af.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-DEADLINE-10B/1762652579.5400288",
- "retrieved_timestamp": "1762652579.54003",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DavidAU/Gemma-The-Writer-DEADLINE-10B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "DavidAU/Gemma-The-Writer-DEADLINE-10B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23315802071836061
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5896087932535433
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09894259818731117
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3422818791946309
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4188645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39461436170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.952
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-J.GutenBerg-10B/3d1cef14-ea09-45ca-a92c-a1fe7a05ce8b.json b/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-J.GutenBerg-10B/3d1cef14-ea09-45ca-a92c-a1fe7a05ce8b.json
deleted file mode 100644
index 339312494e1ebd21c7e17246b68d63239393b1ae..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-J.GutenBerg-10B/3d1cef14-ea09-45ca-a92c-a1fe7a05ce8b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-J.GutenBerg-10B/1762652579.5402539",
- "retrieved_timestamp": "1762652579.540255",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DavidAU/Gemma-The-Writer-J.GutenBerg-10B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "DavidAU/Gemma-The-Writer-J.GutenBerg-10B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28578948301617485
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5909421265868766
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09214501510574018
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41759375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3946974734042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.034
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-Mighty-Sword-9B/a403d91c-4f30-4d05-9f00-24ce97cc91ac.json b/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-Mighty-Sword-9B/a403d91c-4f30-4d05-9f00-24ce97cc91ac.json
deleted file mode 100644
index 8d03495e92476363d567bd844b15484966c025dd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-Mighty-Sword-9B/a403d91c-4f30-4d05-9f00-24ce97cc91ac.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-Mighty-Sword-9B/1762652579.540473",
- "retrieved_timestamp": "1762652579.5404742",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DavidAU/Gemma-The-Writer-Mighty-Sword-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "DavidAU/Gemma-The-Writer-Mighty-Sword-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7527549125209998
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5911959785635329
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19108761329305135
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34815436241610737
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4111770833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39677526595744683
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-N-Restless-Quill-10B-Uncensored/b708a2a6-d738-48a9-9c20-0838bdb19646.json b/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-N-Restless-Quill-10B-Uncensored/b708a2a6-d738-48a9-9c20-0838bdb19646.json
deleted file mode 100644
index 8e81e365a7e90cab5aa4cfcf6bbbf3c885d2eb76..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/DavidAU_Gemma-The-Writer-N-Restless-Quill-10B-Uncensored/b708a2a6-d738-48a9-9c20-0838bdb19646.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/DavidAU_Gemma-The-Writer-N-Restless-Quill-10B-Uncensored/1762652579.540709",
- "retrieved_timestamp": "1762652579.54071",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "DavidAU/Gemma-The-Writer-N-Restless-Quill-10B-Uncensored"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7070927361622716
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5922294775018883
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.229607250755287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3414429530201342
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41632291666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3966090425531915
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.034
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/EpistemeAI2_Athene-codegemma-2-7b-it-alpaca-v1.2/ea4bffba-6e14-4380-a060-2b4deb6d94c0.json b/leaderboard_data/HFOpenLLMv2/google/EpistemeAI2_Athene-codegemma-2-7b-it-alpaca-v1.2/ea4bffba-6e14-4380-a060-2b4deb6d94c0.json
deleted file mode 100644
index a9d11bdcc26f3055618ffac6b7ffd9b2fda744b9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/EpistemeAI2_Athene-codegemma-2-7b-it-alpaca-v1.2/ea4bffba-6e14-4380-a060-2b4deb6d94c0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI2_Athene-codegemma-2-7b-it-alpaca-v1.2/1762652579.609552",
- "retrieved_timestamp": "1762652579.6095529",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "EpistemeAI2/Athene-codegemma-2-7b-it-alpaca-v1.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4351177098986245
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41754154460978427
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04229607250755287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41696875000000005
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22972074468085107
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 7.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/EpistemeAI_Athena-gemma-2-2b-it-Philos/21096485-ff49-4481-a530-48746334fceb.json b/leaderboard_data/HFOpenLLMv2/google/EpistemeAI_Athena-gemma-2-2b-it-Philos/21096485-ff49-4481-a530-48746334fceb.json
deleted file mode 100644
index 0a91c9bdac86ea9ce5346520d14c0d401ed04b60..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/EpistemeAI_Athena-gemma-2-2b-it-Philos/21096485-ff49-4481-a530-48746334fceb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Athena-gemma-2-2b-it-Philos/1762652579.598697",
- "retrieved_timestamp": "1762652579.598698",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Athena-gemma-2-2b-it-Philos",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Athena-gemma-2-2b-it-Philos"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4620950189940469
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37947768790586744
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03700906344410876
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43136458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22481715425531915
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/EpistemeAI_Athena-gemma-2-2b-it/a0ca047c-97c2-4ba1-84a7-ba0b00ba6d25.json b/leaderboard_data/HFOpenLLMv2/google/EpistemeAI_Athena-gemma-2-2b-it/a0ca047c-97c2-4ba1-84a7-ba0b00ba6d25.json
deleted file mode 100644
index d97646627f4881a09af4c4517b6f9f58d14f94f5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/EpistemeAI_Athena-gemma-2-2b-it/a0ca047c-97c2-4ba1-84a7-ba0b00ba6d25.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Athena-gemma-2-2b-it/1762652579.598221",
- "retrieved_timestamp": "1762652579.598221",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Athena-gemma-2-2b-it",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Athena-gemma-2-2b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3134172883504657
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42642293591146
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04909365558912387
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43505208333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2421875
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/EpistemeAI_Athene-codegemma-2-7b-it-alpaca-v1.3/c05e106e-203a-49e7-b656-22809ac16037.json b/leaderboard_data/HFOpenLLMv2/google/EpistemeAI_Athene-codegemma-2-7b-it-alpaca-v1.3/c05e106e-203a-49e7-b656-22809ac16037.json
deleted file mode 100644
index a4bc5815456b8e27fee16e71bbc5cc7574b21270..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/EpistemeAI_Athene-codegemma-2-7b-it-alpaca-v1.3/c05e106e-203a-49e7-b656-22809ac16037.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/EpistemeAI_Athene-codegemma-2-7b-it-alpaca-v1.3/1762652579.598942",
- "retrieved_timestamp": "1762652579.598943",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "EpistemeAI/Athene-codegemma-2-7b-it-alpaca-v1.3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40299405577201824
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4331916189482215
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.061933534743202415
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2802013422818792
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4503020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25872672872340424
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 7.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/GenVRadmin_AryaBhatta-GemmaOrca-2-Merged/d4bb122a-87b4-482e-8050-7c1716a4ed5b.json b/leaderboard_data/HFOpenLLMv2/google/GenVRadmin_AryaBhatta-GemmaOrca-2-Merged/d4bb122a-87b4-482e-8050-7c1716a4ed5b.json
deleted file mode 100644
index 0cb75fba0f0655203f71a937c8dd1272af918f8f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/GenVRadmin_AryaBhatta-GemmaOrca-2-Merged/d4bb122a-87b4-482e-8050-7c1716a4ed5b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/GenVRadmin_AryaBhatta-GemmaOrca-2-Merged/1762652579.627253",
- "retrieved_timestamp": "1762652579.627253",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "GenVRadmin/AryaBhatta-GemmaOrca-2-Merged",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "GenVRadmin/AryaBhatta-GemmaOrca-2-Merged"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30637375497014585
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3887493166323577
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04984894259818731
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4550208333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23844747340425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 8.538
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/GenVRadmin_AryaBhatta-GemmaOrca-Merged/179d4baf-7da1-4a56-82e7-35ea45204e13.json b/leaderboard_data/HFOpenLLMv2/google/GenVRadmin_AryaBhatta-GemmaOrca-Merged/179d4baf-7da1-4a56-82e7-35ea45204e13.json
deleted file mode 100644
index d687f2a3eafd8c3db8253d6d1b4670387ce95ac5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/GenVRadmin_AryaBhatta-GemmaOrca-Merged/179d4baf-7da1-4a56-82e7-35ea45204e13.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/GenVRadmin_AryaBhatta-GemmaOrca-Merged/1762652579.627504",
- "retrieved_timestamp": "1762652579.6275048",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "GenVRadmin/AryaBhatta-GemmaOrca-Merged",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "GenVRadmin/AryaBhatta-GemmaOrca-Merged"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30637375497014585
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4130633897394575
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0513595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2558724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3523854166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22282247340425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 8.538
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/GenVRadmin_AryaBhatta-GemmaUltra-Merged/4aca90c3-b0c0-4ec6-ba6b-0d5b09ef63fe.json b/leaderboard_data/HFOpenLLMv2/google/GenVRadmin_AryaBhatta-GemmaUltra-Merged/4aca90c3-b0c0-4ec6-ba6b-0d5b09ef63fe.json
deleted file mode 100644
index 4b4b3b1af72816428340a68c0518631138b8baa9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/GenVRadmin_AryaBhatta-GemmaUltra-Merged/4aca90c3-b0c0-4ec6-ba6b-0d5b09ef63fe.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/GenVRadmin_AryaBhatta-GemmaUltra-Merged/1762652579.627715",
- "retrieved_timestamp": "1762652579.627716",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "GenVRadmin/AryaBhatta-GemmaUltra-Merged",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "GenVRadmin/AryaBhatta-GemmaUltra-Merged"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30207737691547315
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4141445378464817
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05362537764350453
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25335570469798663
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42785416666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2265625
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 8.538
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/Gunulhona_Gemma-Ko-Merge-PEFT/7891a95c-8d95-4181-96e8-cdc2f6ab538b.json b/leaderboard_data/HFOpenLLMv2/google/Gunulhona_Gemma-Ko-Merge-PEFT/7891a95c-8d95-4181-96e8-cdc2f6ab538b.json
deleted file mode 100644
index 75c9626babc284f0feb2056be985c15b43c8ca30..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/Gunulhona_Gemma-Ko-Merge-PEFT/7891a95c-8d95-4181-96e8-cdc2f6ab538b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Gunulhona_Gemma-Ko-Merge-PEFT/1762652579.635783",
- "retrieved_timestamp": "1762652579.635786",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Gunulhona/Gemma-Ko-Merge-PEFT",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "Gunulhona/Gemma-Ko-Merge-PEFT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4441348954108433
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4862989687822461
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3985833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3097573138297872
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "?",
- "params_billions": 20.318
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/Gunulhona_Gemma-Ko-Merge-PEFT/f9fb4008-db4e-4a84-b12b-050bdf35084f.json b/leaderboard_data/HFOpenLLMv2/google/Gunulhona_Gemma-Ko-Merge-PEFT/f9fb4008-db4e-4a84-b12b-050bdf35084f.json
deleted file mode 100644
index d11cecad0c2923d067dcb5107fe66d5aed95b01e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/Gunulhona_Gemma-Ko-Merge-PEFT/f9fb4008-db4e-4a84-b12b-050bdf35084f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Gunulhona_Gemma-Ko-Merge-PEFT/1762652579.635457",
- "retrieved_timestamp": "1762652579.635457",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Gunulhona/Gemma-Ko-Merge-PEFT",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "Gunulhona/Gemma-Ko-Merge-PEFT"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28803906966847964
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5154093999781059
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32466442953020136
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40801041666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38173204787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "?",
- "params_billions": 20.318
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/Gunulhona_Gemma-Ko-Merge/dccf426d-63bb-4298-958f-d1f4776f03b2.json b/leaderboard_data/HFOpenLLMv2/google/Gunulhona_Gemma-Ko-Merge/dccf426d-63bb-4298-958f-d1f4776f03b2.json
deleted file mode 100644
index f2c993c0e0d2c5869316d481a1e2a32a2ec110c6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/Gunulhona_Gemma-Ko-Merge/dccf426d-63bb-4298-958f-d1f4776f03b2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Gunulhona_Gemma-Ko-Merge/1762652579.635146",
- "retrieved_timestamp": "1762652579.635147",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Gunulhona/Gemma-Ko-Merge",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "Gunulhona/Gemma-Ko-Merge"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6415721397004392
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5813027258981727
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18806646525679757
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33557046979865773
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40469791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3878823138297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/HuggingFaceH4_zephyr-7b-gemma-v0.1/dcf4d2bb-ee8f-4083-baf6-8870731515fa.json b/leaderboard_data/HFOpenLLMv2/google/HuggingFaceH4_zephyr-7b-gemma-v0.1/dcf4d2bb-ee8f-4083-baf6-8870731515fa.json
deleted file mode 100644
index a7f0ce45a526a5c173cbd4e12b78f490656b0d49..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/HuggingFaceH4_zephyr-7b-gemma-v0.1/dcf4d2bb-ee8f-4083-baf6-8870731515fa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/HuggingFaceH4_zephyr-7b-gemma-v0.1/1762652579.641236",
- "retrieved_timestamp": "1762652579.641237",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "HuggingFaceH4/zephyr-7b-gemma-v0.1",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "HuggingFaceH4/zephyr-7b-gemma-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3363741539116212
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4623735014679749
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08157099697885196
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37396874999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2847406914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 8.538
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/INSAIT-Institute_BgGPT-Gemma-2-27B-IT-v1.0/51d4db96-4c38-464a-9e7f-0ade67699c8d.json b/leaderboard_data/HFOpenLLMv2/google/INSAIT-Institute_BgGPT-Gemma-2-27B-IT-v1.0/51d4db96-4c38-464a-9e7f-0ade67699c8d.json
deleted file mode 100644
index 788ac4539f06e8ec705eb938ec5739f239253de9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/INSAIT-Institute_BgGPT-Gemma-2-27B-IT-v1.0/51d4db96-4c38-464a-9e7f-0ade67699c8d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/INSAIT-Institute_BgGPT-Gemma-2-27B-IT-v1.0/1762652579.645844",
- "retrieved_timestamp": "1762652579.645845",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "INSAIT-Institute/BgGPT-Gemma-2-27B-IT-v1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911778102988436
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35753125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11668882978723404
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/IlyaGusev_gemma-2-2b-it-abliterated/e3ee4f00-1037-4da7-96e2-934b5ccefd15.json b/leaderboard_data/HFOpenLLMv2/google/IlyaGusev_gemma-2-2b-it-abliterated/e3ee4f00-1037-4da7-96e2-934b5ccefd15.json
deleted file mode 100644
index c0fe2e3ab347befe9ecfe07f0fb7e8d8453dc83a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/IlyaGusev_gemma-2-2b-it-abliterated/e3ee4f00-1037-4da7-96e2-934b5ccefd15.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/IlyaGusev_gemma-2-2b-it-abliterated/1762652579.646105",
- "retrieved_timestamp": "1762652579.646106",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "IlyaGusev/gemma-2-2b-it-abliterated",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "IlyaGusev/gemma-2-2b-it-abliterated"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.533086654521115
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4118601326211988
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06117824773413897
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37818749999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25382313829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/IlyaGusev_gemma-2-9b-it-abliterated/8a81c9e6-1c72-46f6-98c6-0d3b28ba5633.json b/leaderboard_data/HFOpenLLMv2/google/IlyaGusev_gemma-2-9b-it-abliterated/8a81c9e6-1c72-46f6-98c6-0d3b28ba5633.json
deleted file mode 100644
index 1f924626597d933a5000f07456860b7224b2cdc0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/IlyaGusev_gemma-2-9b-it-abliterated/8a81c9e6-1c72-46f6-98c6-0d3b28ba5633.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/IlyaGusev_gemma-2-9b-it-abliterated/1762652579.646349",
- "retrieved_timestamp": "1762652579.6463501",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "IlyaGusev/gemma-2-9b-it-abliterated",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "IlyaGusev/gemma-2-9b-it-abliterated"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.747259493698941
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.59063299776093
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17749244712990936
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34563758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4033645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39153922872340424
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/LenguajeNaturalAI_leniachat-gemma-2b-v0/af954640-6806-4e4c-9c0b-b81215eadfc8.json b/leaderboard_data/HFOpenLLMv2/google/LenguajeNaturalAI_leniachat-gemma-2b-v0/af954640-6806-4e4c-9c0b-b81215eadfc8.json
deleted file mode 100644
index 55638238d2105ebf6f4c6bb57457247637de6c75..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/LenguajeNaturalAI_leniachat-gemma-2b-v0/af954640-6806-4e4c-9c0b-b81215eadfc8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/LenguajeNaturalAI_leniachat-gemma-2b-v0/1762652579.7101068",
- "retrieved_timestamp": "1762652579.7101078",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "LenguajeNaturalAI/leniachat-gemma-2b-v0",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "LenguajeNaturalAI/leniachat-gemma-2b-v0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21497404664069114
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30740211895412034
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.011329305135951661
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36590625000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11702127659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 2.506
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/ModelSpace_GemmaX2-28-9B-v0.1/6cb560eb-08f5-4430-8797-1116f1d2f56c.json b/leaderboard_data/HFOpenLLMv2/google/ModelSpace_GemmaX2-28-9B-v0.1/6cb560eb-08f5-4430-8797-1116f1d2f56c.json
deleted file mode 100644
index 2450df08c50c775933facec73040904b349cdaa1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/ModelSpace_GemmaX2-28-9B-v0.1/6cb560eb-08f5-4430-8797-1116f1d2f56c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ModelSpace_GemmaX2-28-9B-v0.1/1762652579.76179",
- "retrieved_timestamp": "1762652579.761791",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ModelSpace/GemmaX2-28-9B-v0.1",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "ModelSpace/GemmaX2-28-9B-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.003921816336210145
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3687226427280163
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.027190332326283987
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27684563758389263
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35365625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2230718085106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/NAPS-ai_naps-gemma-2-27b-v-0.1.0/8768f068-452f-4a54-bddb-9f6cffaf5a19.json b/leaderboard_data/HFOpenLLMv2/google/NAPS-ai_naps-gemma-2-27b-v-0.1.0/8768f068-452f-4a54-bddb-9f6cffaf5a19.json
deleted file mode 100644
index 33ab1875915786d8ae9fe8b889eaadca3e827eac..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/NAPS-ai_naps-gemma-2-27b-v-0.1.0/8768f068-452f-4a54-bddb-9f6cffaf5a19.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-gemma-2-27b-v-0.1.0/1762652579.7653928",
- "retrieved_timestamp": "1762652579.765394",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NAPS-ai/naps-gemma-2-27b-v-0.1.0",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "NAPS-ai/naps-gemma-2-27b-v-0.1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911778102988436
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35753125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11677194148936171
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/NAPS-ai_naps-gemma-2-27b-v0.1.0/b004d154-392d-4f31-afbb-547b058996bd.json b/leaderboard_data/HFOpenLLMv2/google/NAPS-ai_naps-gemma-2-27b-v0.1.0/b004d154-392d-4f31-afbb-547b058996bd.json
deleted file mode 100644
index 33ae17d7ec6123b6f59fc32d9c37611faff1eb84..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/NAPS-ai_naps-gemma-2-27b-v0.1.0/b004d154-392d-4f31-afbb-547b058996bd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/NAPS-ai_naps-gemma-2-27b-v0.1.0/1762652579.765648",
- "retrieved_timestamp": "1762652579.7656488",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "NAPS-ai/naps-gemma-2-27b-v0.1.0",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "NAPS-ai/naps-gemma-2-27b-v0.1.0"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911778102988436
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35753125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11677194148936171
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/SaisExperiments_Gemma-2-2B-Stheno-Filtered/16070acb-e8bb-476a-b5aa-863a85cb0aee.json b/leaderboard_data/HFOpenLLMv2/google/SaisExperiments_Gemma-2-2B-Stheno-Filtered/16070acb-e8bb-476a-b5aa-863a85cb0aee.json
deleted file mode 100644
index 548808c804b6030de4b1d83e3e7f868dde1b2b33..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/SaisExperiments_Gemma-2-2B-Stheno-Filtered/16070acb-e8bb-476a-b5aa-863a85cb0aee.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/SaisExperiments_Gemma-2-2B-Stheno-Filtered/1762652579.855671",
- "retrieved_timestamp": "1762652579.8556721",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "SaisExperiments/Gemma-2-2B-Stheno-Filtered",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "SaisExperiments/Gemma-2-2B-Stheno-Filtered"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4196554032190144
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4149234152222183
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04607250755287009
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40029166666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2629654255319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/Skywork_Skywork-Reward-Gemma-2-27B-v0.2/140b0661-2961-46f3-8c75-cb75147e0acc.json b/leaderboard_data/HFOpenLLMv2/google/Skywork_Skywork-Reward-Gemma-2-27B-v0.2/140b0661-2961-46f3-8c75-cb75147e0acc.json
deleted file mode 100644
index 6e0fa4ae54967fb0a1ad5211a7b0d466689906b8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/Skywork_Skywork-Reward-Gemma-2-27B-v0.2/140b0661-2961-46f3-8c75-cb75147e0acc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Skywork_Skywork-Reward-Gemma-2-27B-v0.2/1762652579.8884969",
- "retrieved_timestamp": "1762652579.8884978",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Skywork/Skywork-Reward-Gemma-2-27B-v0.2",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "Skywork/Skywork-Reward-Gemma-2-27B-v0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7807317916461656
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.635960062329604
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22734138972809667
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34395973154362414
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42314583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4103224734042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForSequenceClassification",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/Sorawiz_Gemma-9B-Base/246e4c1f-016c-411e-870e-9ade63713daa.json b/leaderboard_data/HFOpenLLMv2/google/Sorawiz_Gemma-9B-Base/246e4c1f-016c-411e-870e-9ade63713daa.json
deleted file mode 100644
index 0acd66faaf2ab7f7588dd83aa8cad09a1cbc8215..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/Sorawiz_Gemma-9B-Base/246e4c1f-016c-411e-870e-9ade63713daa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sorawiz_Gemma-9B-Base/1762652579.8897338",
- "retrieved_timestamp": "1762652579.889735",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sorawiz/Gemma-9B-Base",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "Sorawiz/Gemma-9B-Base"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16673758959560633
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.593040577894583
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09818731117824774
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33976510067114096
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40451041666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42353723404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/Sorawiz_Gemma-Creative-9B-Base/26229a4f-9f53-453f-9899-77808040f8cb.json b/leaderboard_data/HFOpenLLMv2/google/Sorawiz_Gemma-Creative-9B-Base/26229a4f-9f53-453f-9899-77808040f8cb.json
deleted file mode 100644
index 27be9020e1b145eb7a9b973c58bd55b3e23fa589..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/Sorawiz_Gemma-Creative-9B-Base/26229a4f-9f53-453f-9899-77808040f8cb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Sorawiz_Gemma-Creative-9B-Base/1762652579.890075",
- "retrieved_timestamp": "1762652579.890076",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Sorawiz/Gemma-Creative-9B-Base",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "Sorawiz/Gemma-Creative-9B-Base"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1515002415812267
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5458614335095562
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07779456193353475
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3296979865771812
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.401875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4007646276595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/Supichi_BBAI_135_Gemma/64cd00af-6782-431b-aac1-445e39d56717.json b/leaderboard_data/HFOpenLLMv2/google/Supichi_BBAI_135_Gemma/64cd00af-6782-431b-aac1-445e39d56717.json
deleted file mode 100644
index 17ef9004a03966a53b4cc640fd08e98e96b9791f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/Supichi_BBAI_135_Gemma/64cd00af-6782-431b-aac1-445e39d56717.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Supichi_BBAI_135_Gemma/1762652579.8946822",
- "retrieved_timestamp": "1762652579.894683",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Supichi/BBAI_135_Gemma",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "Supichi/BBAI_135_Gemma"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06562144000141845
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35684129093449685
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38047916666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16722074468085107
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 19.3
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Gemmasutra-9B-v1/3f7a68f4-e456-4ecf-8a5f-1f3698822a89.json b/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Gemmasutra-9B-v1/3f7a68f4-e456-4ecf-8a5f-1f3698822a89.json
deleted file mode 100644
index 1340a4252d63decb5994a28af0bc1a1eb4ee98ae..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Gemmasutra-9B-v1/3f7a68f4-e456-4ecf-8a5f-1f3698822a89.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TheDrummer_Gemmasutra-9B-v1/1762652579.9140742",
- "retrieved_timestamp": "1762652579.914075",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TheDrummer/Gemmasutra-9B-v1",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "TheDrummer/Gemmasutra-9B-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24155130609006326
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5886914248369671
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08308157099697885
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48459375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4045046542553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Gemmasutra-Mini-2B-v1/3c066bd3-ec6c-412d-86a1-759c228610b9.json b/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Gemmasutra-Mini-2B-v1/3c066bd3-ec6c-412d-86a1-759c228610b9.json
deleted file mode 100644
index ae84f36302a412f264c1ace0e70685fe829c2c57..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Gemmasutra-Mini-2B-v1/3c066bd3-ec6c-412d-86a1-759c228610b9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TheDrummer_Gemmasutra-Mini-2B-v1/1762652579.914318",
- "retrieved_timestamp": "1762652579.914319",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TheDrummer/Gemmasutra-Mini-2B-v1",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "TheDrummer/Gemmasutra-Mini-2B-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25486597782771936
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35750190791471836
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0377643504531722
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2709731543624161
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3489791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20545212765957446
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Tiger-Gemma-9B-v1/7b093f59-7a4e-4e72-b9a6-7d10870917ea.json b/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Tiger-Gemma-9B-v1/7b093f59-7a4e-4e72-b9a6-7d10870917ea.json
deleted file mode 100644
index 0bcb644f9a5d78d6523999605260420a4770e1fd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Tiger-Gemma-9B-v1/7b093f59-7a4e-4e72-b9a6-7d10870917ea.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TheDrummer_Tiger-Gemma-9B-v1/1762652579.915312",
- "retrieved_timestamp": "1762652579.915313",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TheDrummer/Tiger-Gemma-9B-v1",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "TheDrummer/Tiger-Gemma-9B-v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.728150197032762
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5703687739329574
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18353474320241692
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3389261744966443
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41616666666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41181848404255317
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Tiger-Gemma-9B-v2/962205b9-009a-4201-b382-5143c80e78ce.json b/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Tiger-Gemma-9B-v2/962205b9-009a-4201-b382-5143c80e78ce.json
deleted file mode 100644
index f376d5dd70bd42eba8d2ac6f432a7106b117b6a4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Tiger-Gemma-9B-v2/962205b9-009a-4201-b382-5143c80e78ce.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TheDrummer_Tiger-Gemma-9B-v2/1762652579.915529",
- "retrieved_timestamp": "1762652579.91553",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TheDrummer/Tiger-Gemma-9B-v2",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "TheDrummer/Tiger-Gemma-9B-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6985997154217476
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5617191114121779
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18202416918429004
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33976510067114096
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40841666666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41123670212765956
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Tiger-Gemma-9B-v3/6fbfd3ba-e28a-4e9d-be12-e04b6d50b9ee.json b/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Tiger-Gemma-9B-v3/6fbfd3ba-e28a-4e9d-be12-e04b6d50b9ee.json
deleted file mode 100644
index 5822da3931f847e3932d33634a878cad29e7b149..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/TheDrummer_Tiger-Gemma-9B-v3/6fbfd3ba-e28a-4e9d-be12-e04b6d50b9ee.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/TheDrummer_Tiger-Gemma-9B-v3/1762652579.915734",
- "retrieved_timestamp": "1762652579.915734",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "TheDrummer/Tiger-Gemma-9B-v3",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "TheDrummer/Tiger-Gemma-9B-v3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6820635912711606
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5812231557853248
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1623867069486405
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3389261744966443
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4003541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40591755319148937
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/Triangle104_Gemmadevi-Stock-10B/153fd43a-fe54-4a99-98dd-5420f2bf8b66.json b/leaderboard_data/HFOpenLLMv2/google/Triangle104_Gemmadevi-Stock-10B/153fd43a-fe54-4a99-98dd-5420f2bf8b66.json
deleted file mode 100644
index cb7771fb2f5543fbf7453f461bd58549a447b6c1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/Triangle104_Gemmadevi-Stock-10B/153fd43a-fe54-4a99-98dd-5420f2bf8b66.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Triangle104_Gemmadevi-Stock-10B/1762652579.9249291",
- "retrieved_timestamp": "1762652579.9249291",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Triangle104/Gemmadevi-Stock-10B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "Triangle104/Gemmadevi-Stock-10B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15819470117067158
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6065922684184144
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09667673716012085
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35318791946308725
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46211458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4261968085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter1/687769ed-44e9-4f3d-aee6-2dc4e98dd7ee.json b/leaderboard_data/HFOpenLLMv2/google/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter1/687769ed-44e9-4f3d-aee6-2dc4e98dd7ee.json
deleted file mode 100644
index e1fd6426e67ce31fd849d294719f386c07afe3fc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter1/687769ed-44e9-4f3d-aee6-2dc4e98dd7ee.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter1/1762652579.936019",
- "retrieved_timestamp": "1762652579.93602",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.308221075634871
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5968934762705508
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08987915407854985
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33640939597315433
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4099375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39070811170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter2/fa584f01-69eb-4ecc-9f0d-049b6bfb05c8.json b/leaderboard_data/HFOpenLLMv2/google/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter2/fa584f01-69eb-4ecc-9f0d-049b6bfb05c8.json
deleted file mode 100644
index 9444e2da916649cd8781824675707d851550d3ed..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter2/fa584f01-69eb-4ecc-9f0d-049b6bfb05c8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter2/1762652579.936279",
- "retrieved_timestamp": "1762652579.93628",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3100196367859502
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5989880877421281
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08081570996978851
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3347315436241611
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4139375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.386968085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter3/f318d457-d295-4447-9222-0b0d92708b5d.json b/leaderboard_data/HFOpenLLMv2/google/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter3/f318d457-d295-4447-9222-0b0d92708b5d.json
deleted file mode 100644
index 658860b0110513b164b7b2487c2a1cbb90b76610..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter3/f318d457-d295-4447-9222-0b0d92708b5d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/UCLA-AGI_Gemma-2-9B-It-SPPO-Iter3/1762652579.9364889",
- "retrieved_timestamp": "1762652579.93649",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31671409637539505
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6007080229268026
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07099697885196375
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3389261744966443
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41660416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.382563164893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-Gemma-2b/b002a274-9b4f-40ad-b0c7-e4efabbe431f.json b/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-Gemma-2b/b002a274-9b4f-40ad-b0c7-e4efabbe431f.json
deleted file mode 100644
index 328d48174f4ac968e1ded0421e5b7a609af782d2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-Gemma-2b/b002a274-9b4f-40ad-b0c7-e4efabbe431f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Gemma-2b/1762652579.941349",
- "retrieved_timestamp": "1762652579.94135",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "VAGOsolutions/SauerkrautLM-Gemma-2b",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "VAGOsolutions/SauerkrautLM-Gemma-2b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24752213017017072
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3416315376053174
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.027945619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25671140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3675833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14685837765957446
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 2.506
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-Gemma-7b/e66f4326-2585-4581-b45f-d9a81fb1576c.json b/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-Gemma-7b/e66f4326-2585-4581-b45f-d9a81fb1576c.json
deleted file mode 100644
index f8d978b0a0d6548c7cfa610a3d7d93307c664048..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-Gemma-7b/e66f4326-2585-4581-b45f-d9a81fb1576c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-Gemma-7b/1762652579.9415941",
- "retrieved_timestamp": "1762652579.9415948",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "VAGOsolutions/SauerkrautLM-Gemma-7b",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "VAGOsolutions/SauerkrautLM-Gemma-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3406705319662939
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41879127895858687
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06722054380664652
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2860738255033557
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35942708333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961269946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 8.538
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-gemma-2-2b-it/b010858c-edb5-4e49-b5b6-72b06943ab2c.json b/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-gemma-2-2b-it/b010858c-edb5-4e49-b5b6-72b06943ab2c.json
deleted file mode 100644
index be8967233ad1495cd1dcdcca7dc864840829e62c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-gemma-2-2b-it/b010858c-edb5-4e49-b5b6-72b06943ab2c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-gemma-2-2b-it/1762652579.9427688",
- "retrieved_timestamp": "1762652579.94277",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "VAGOsolutions/SauerkrautLM-gemma-2-2b-it",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "VAGOsolutions/SauerkrautLM-gemma-2-2b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13206625088099574
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42408371860644856
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02190332326283988
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3994583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.269281914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-gemma-2-9b-it/5395cbac-afe0-4936-b4eb-f554fcb5be75.json b/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-gemma-2-9b-it/5395cbac-afe0-4936-b4eb-f554fcb5be75.json
deleted file mode 100644
index b182fc0eaa87f0224ad56a81d01b67e9c494ac35..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/VAGOsolutions_SauerkrautLM-gemma-2-9b-it/5395cbac-afe0-4936-b4eb-f554fcb5be75.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/VAGOsolutions_SauerkrautLM-gemma-2-9b-it/1762652579.94298",
- "retrieved_timestamp": "1762652579.942981",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "VAGOsolutions/SauerkrautLM-gemma-2-9b-it",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "VAGOsolutions/SauerkrautLM-gemma-2-9b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3024009627787604
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6072645787154746
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08383685800604229
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3271812080536913
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43182291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40907579787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/Youlln_4PRYMMAL-GEMMA2-9B-SLERP/06b75d54-4d17-4116-a4d5-0917eedb2dc4.json b/leaderboard_data/HFOpenLLMv2/google/Youlln_4PRYMMAL-GEMMA2-9B-SLERP/06b75d54-4d17-4116-a4d5-0917eedb2dc4.json
deleted file mode 100644
index 3e84cf7881b4e16ae561d1214a1b32e2b613f863..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/Youlln_4PRYMMAL-GEMMA2-9B-SLERP/06b75d54-4d17-4116-a4d5-0917eedb2dc4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/Youlln_4PRYMMAL-GEMMA2-9B-SLERP/1762652579.961175",
- "retrieved_timestamp": "1762652579.9611762",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "Youlln/4PRYMMAL-GEMMA2-9B-SLERP",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "Youlln/4PRYMMAL-GEMMA2-9B-SLERP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2713766140507188
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5922529923998928
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09063444108761329
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46719791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42096077127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/ZHLiu627_zephyr-7b-gemma-rpo-avg/6333359d-1cf7-4905-9a48-f8a8f7b46ed2.json b/leaderboard_data/HFOpenLLMv2/google/ZHLiu627_zephyr-7b-gemma-rpo-avg/6333359d-1cf7-4905-9a48-f8a8f7b46ed2.json
deleted file mode 100644
index 5af0f8c5a56d96fd0fb0934cc242cbab69e9256d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/ZHLiu627_zephyr-7b-gemma-rpo-avg/6333359d-1cf7-4905-9a48-f8a8f7b46ed2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ZHLiu627_zephyr-7b-gemma-rpo-avg/1762652579.9660559",
- "retrieved_timestamp": "1762652579.966057",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ZHLiu627/zephyr-7b-gemma-rpo-avg",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "ZHLiu627/zephyr-7b-gemma-rpo-avg"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30060350979844586
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41832761356743015
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04984894259818731
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27684563758389263
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40810416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2830784574468085
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 8.538
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/agentlans_Gemma2-9B-AdvancedFuse/3bcdf1ca-ad29-45cf-ac97-6bc508981545.json b/leaderboard_data/HFOpenLLMv2/google/agentlans_Gemma2-9B-AdvancedFuse/3bcdf1ca-ad29-45cf-ac97-6bc508981545.json
deleted file mode 100644
index 4b1006c544785c34c85c743063047bbaaad8897c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/agentlans_Gemma2-9B-AdvancedFuse/3bcdf1ca-ad29-45cf-ac97-6bc508981545.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/agentlans_Gemma2-9B-AdvancedFuse/1762652579.975734",
- "retrieved_timestamp": "1762652579.975735",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "agentlans/Gemma2-9B-AdvancedFuse",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "agentlans/Gemma2-9B-AdvancedFuse"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15427288483446144
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.585936684475517
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10045317220543806
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3347315436241611
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4230833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4000166223404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp1-2.6B/e52ac657-26a3-499a-949f-bf2a0b620d8e.json b/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp1-2.6B/e52ac657-26a3-499a-949f-bf2a0b620d8e.json
deleted file mode 100644
index 6419fe7b765590a839081b8a1c4b2d864fd332c2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp1-2.6B/e52ac657-26a3-499a-949f-bf2a0b620d8e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp1-2.6B/1762652579.985875",
- "retrieved_timestamp": "1762652579.985876",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Gemma2Slerp1-2.6B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "allknowingroger/Gemma2Slerp1-2.6B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5354348683714766
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4343094462630086
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10649546827794562
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2835570469798658
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45616666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26886635638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp1-27B/42d79295-bdb0-411d-b1b0-5cff954e925c.json b/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp1-27B/42d79295-bdb0-411d-b1b0-5cff954e925c.json
deleted file mode 100644
index 2418f5dead3335d02ca4f815e8b807570befe565..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp1-27B/42d79295-bdb0-411d-b1b0-5cff954e925c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp1-27B/1762652579.986121",
- "retrieved_timestamp": "1762652579.986122",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Gemma2Slerp1-27B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "allknowingroger/Gemma2Slerp1-27B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7186332265056716
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6398902146527521
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2583081570996979
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3640939597315436
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47671875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44564494680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp2-2.6B/eeb46285-0c8d-43b7-9b6d-e86c24064fde.json b/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp2-2.6B/eeb46285-0c8d-43b7-9b6d-e86c24064fde.json
deleted file mode 100644
index b3c302fb0bdf13effaca96ea338602ad6c43d29b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp2-2.6B/eeb46285-0c8d-43b7-9b6d-e86c24064fde.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp2-2.6B/1762652579.98633",
- "retrieved_timestamp": "1762652579.98633",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Gemma2Slerp2-2.6B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "allknowingroger/Gemma2Slerp2-2.6B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5747272791748117
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4307646783089521
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09063444108761329
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44677083333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26961436170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp2-27B/1f2c33e8-2d7b-4bd5-81e8-1c9bcae0ae8f.json b/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp2-27B/1f2c33e8-2d7b-4bd5-81e8-1c9bcae0ae8f.json
deleted file mode 100644
index 1ba58c357fe7fa4c882f7916eb9e83f1df8cf0c0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp2-27B/1f2c33e8-2d7b-4bd5-81e8-1c9bcae0ae8f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp2-27B/1762652579.986531",
- "retrieved_timestamp": "1762652579.9865322",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Gemma2Slerp2-27B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "allknowingroger/Gemma2Slerp2-27B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7545534736720789
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6557274121032689
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27870090634441086
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3699664429530201
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46208333333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46226728723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp3-27B/648810d4-4dd5-48c7-a4d7-b3d9d2f3f3f2.json b/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp3-27B/648810d4-4dd5-48c7-a4d7-b3d9d2f3f3f2.json
deleted file mode 100644
index b71b1c59948163ef6886d0215db9cbdb1b741c63..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp3-27B/648810d4-4dd5-48c7-a4d7-b3d9d2f3f3f2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp3-27B/1762652579.986752",
- "retrieved_timestamp": "1762652579.986753",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Gemma2Slerp3-27B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "allknowingroger/Gemma2Slerp3-27B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7426384216102164
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6499638721230724
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27416918429003023
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3548657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47402083333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4640957446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp4-27B/f94f3bf1-cf85-4673-a5cf-368f250233e4.json b/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp4-27B/f94f3bf1-cf85-4673-a5cf-368f250233e4.json
deleted file mode 100644
index 0489aac8e03f95058bbd622ebca6df01b3260cd9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_Gemma2Slerp4-27B/f94f3bf1-cf85-4673-a5cf-368f250233e4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_Gemma2Slerp4-27B/1762652579.986965",
- "retrieved_timestamp": "1762652579.9869661",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/Gemma2Slerp4-27B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "allknowingroger/Gemma2Slerp4-27B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7496575752337131
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6529581339749019
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2719033232628399
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36661073825503354
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4502395833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46492686170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp-9B/3aed9fd2-45bd-4568-8885-7fc2370bb26d.json b/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp-9B/3aed9fd2-45bd-4568-8885-7fc2370bb26d.json
deleted file mode 100644
index 3870f37d2f9b1147bff65a50b4d8d8287aa1a2b3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp-9B/3aed9fd2-45bd-4568-8885-7fc2370bb26d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaSlerp-9B/1762652579.987181",
- "retrieved_timestamp": "1762652579.9871821",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/GemmaSlerp-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "allknowingroger/GemmaSlerp-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.704320092909037
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.592057786577488
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21601208459214502
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34395973154362414
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46732291666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41605718085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp2-9B/99333370-c7d5-4763-b3a4-14adde0fab9e.json b/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp2-9B/99333370-c7d5-4763-b3a4-14adde0fab9e.json
deleted file mode 100644
index b1fecde55af05fd2567b8f5d170a18a7f8005b93..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp2-9B/99333370-c7d5-4763-b3a4-14adde0fab9e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaSlerp2-9B/1762652579.987394",
- "retrieved_timestamp": "1762652579.987395",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/GemmaSlerp2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "allknowingroger/GemmaSlerp2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7281003293483512
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.598271299766216
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2107250755287009
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3523489932885906
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47671875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42386968085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp4-10B/32e38c82-d412-4888-9d9d-f89aef0989fd.json b/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp4-10B/32e38c82-d412-4888-9d9d-f89aef0989fd.json
deleted file mode 100644
index 17b69a9ea799db2064ec23a7e9c1a6c12fde7f6b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp4-10B/32e38c82-d412-4888-9d9d-f89aef0989fd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaSlerp4-10B/1762652579.9875991",
- "retrieved_timestamp": "1762652579.9875998",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/GemmaSlerp4-10B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "allknowingroger/GemmaSlerp4-10B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7326216660682544
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6027862253440982
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2243202416918429
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35318791946308725
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45398958333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4250332446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp5-10B/e325b56f-4306-4e37-adc5-c09b300a8c30.json b/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp5-10B/e325b56f-4306-4e37-adc5-c09b300a8c30.json
deleted file mode 100644
index 4ab0552c7f767ef0f7484dfae1cc33b1efaa9a0c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaSlerp5-10B/e325b56f-4306-4e37-adc5-c09b300a8c30.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaSlerp5-10B/1762652579.9878101",
- "retrieved_timestamp": "1762652579.987811",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/GemmaSlerp5-10B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "allknowingroger/GemmaSlerp5-10B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7353444416370785
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.605447654436423
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21827794561933533
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3523489932885906
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46078125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4328457446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaStock1-27B/0b19d8bb-1952-4515-8d29-e55e1106e92b.json b/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaStock1-27B/0b19d8bb-1952-4515-8d29-e55e1106e92b.json
deleted file mode 100644
index f4e64aa817dcd5224c865e99d93c6cccc461af81..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/allknowingroger_GemmaStock1-27B/0b19d8bb-1952-4515-8d29-e55e1106e92b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/allknowingroger_GemmaStock1-27B/1762652579.9880252",
- "retrieved_timestamp": "1762652579.9880252",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "allknowingroger/GemmaStock1-27B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "allknowingroger/GemmaStock1-27B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7509064836855099
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6565607454366021
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.263595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3640939597315436
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45268749999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47298869680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/anakin87_gemma-2b-orpo/80531a18-00d3-4264-bf84-cd1d4d90df08.json b/leaderboard_data/HFOpenLLMv2/google/anakin87_gemma-2b-orpo/80531a18-00d3-4264-bf84-cd1d4d90df08.json
deleted file mode 100644
index 57ed13a5478a892aaab4c3da4a05a24917869978..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/anakin87_gemma-2b-orpo/80531a18-00d3-4264-bf84-cd1d4d90df08.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/anakin87_gemma-2b-orpo/1762652580.010973",
- "retrieved_timestamp": "1762652580.010974",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "anakin87/gemma-2b-orpo",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "anakin87/gemma-2b-orpo"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24779695651981187
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34261709435617754
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0188821752265861
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37276041666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1305684840425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 2.506
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/anthracite-org_magnum-v3-9b-customgemma2/865b86aa-7b8d-4619-aa57-3c57cc4c7b51.json b/leaderboard_data/HFOpenLLMv2/google/anthracite-org_magnum-v3-9b-customgemma2/865b86aa-7b8d-4619-aa57-3c57cc4c7b51.json
deleted file mode 100644
index 13953df147fcc05db141dd159e39ca486e06bfff..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/anthracite-org_magnum-v3-9b-customgemma2/865b86aa-7b8d-4619-aa57-3c57cc4c7b51.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/anthracite-org_magnum-v3-9b-customgemma2/1762652580.012768",
- "retrieved_timestamp": "1762652580.012769",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "anthracite-org/magnum-v3-9b-customgemma2",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "anthracite-org/magnum-v3-9b-customgemma2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1272955757390391
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5340136936916174
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07175226586102719
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3288590604026846
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45646875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4204621010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/beomi_gemma-mling-7b/2568a2b7-e95c-4224-9850-5816466b50f2.json b/leaderboard_data/HFOpenLLMv2/google/beomi_gemma-mling-7b/2568a2b7-e95c-4224-9850-5816466b50f2.json
deleted file mode 100644
index c9f2ce1b66b9b7ff25ebd4112911ea4eefc8ec62..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/beomi_gemma-mling-7b/2568a2b7-e95c-4224-9850-5816466b50f2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/beomi_gemma-mling-7b/1762652580.030431",
- "retrieved_timestamp": "1762652580.030431",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "beomi/gemma-mling-7b",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "beomi/gemma-mling-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20290939152559653
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40675941947154004
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.054380664652567974
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37585416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2632978723404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 8.538
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/bunnycore_Gemma-2-2B-Smart/ebada07f-e700-4f38-aec0-f801959969e6.json b/leaderboard_data/HFOpenLLMv2/google/bunnycore_Gemma-2-2B-Smart/ebada07f-e700-4f38-aec0-f801959969e6.json
deleted file mode 100644
index 4fecca52892330602d448043e7e784f4b3164e5d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/bunnycore_Gemma-2-2B-Smart/ebada07f-e700-4f38-aec0-f801959969e6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Gemma-2-2B-Smart/1762652580.044707",
- "retrieved_timestamp": "1762652580.044708",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Gemma-2-2B-Smart",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "bunnycore/Gemma-2-2B-Smart"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13206625088099574
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39742674570492836
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03323262839879154
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2827181208053691
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4248541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2426030585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/bunnycore_Gemma2-9B-TitanFusion/95a2d032-e2a4-46df-84d2-6b7529d5bb01.json b/leaderboard_data/HFOpenLLMv2/google/bunnycore_Gemma2-9B-TitanFusion/95a2d032-e2a4-46df-84d2-6b7529d5bb01.json
deleted file mode 100644
index 129ecba1a4eaf5694b866d7e397c889f10177381..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/bunnycore_Gemma2-9B-TitanFusion/95a2d032-e2a4-46df-84d2-6b7529d5bb01.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/bunnycore_Gemma2-9B-TitanFusion/1762652580.044988",
- "retrieved_timestamp": "1762652580.0449889",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "bunnycore/Gemma2-9B-TitanFusion",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "bunnycore/Gemma2-9B-TitanFusion"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16184169115724056
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5712026020785131
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0770392749244713
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33221476510067116
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41362499999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39602726063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/cat-searcher_gemma-2-9b-it-sppo-iter-1-evol-1/af7a7129-1b6a-4ff5-952f-075ae4f7c137.json b/leaderboard_data/HFOpenLLMv2/google/cat-searcher_gemma-2-9b-it-sppo-iter-1-evol-1/af7a7129-1b6a-4ff5-952f-075ae4f7c137.json
deleted file mode 100644
index 3bbd8aa8645fd8a122875e5c027bea557297247b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/cat-searcher_gemma-2-9b-it-sppo-iter-1-evol-1/af7a7129-1b6a-4ff5-952f-075ae4f7c137.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/cat-searcher_gemma-2-9b-it-sppo-iter-1-evol-1/1762652580.099224",
- "retrieved_timestamp": "1762652580.099225",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "cat-searcher/gemma-2-9b-it-sppo-iter-1-evol-1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2941827683878775
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5939369622672414
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08534743202416918
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34060402684563756
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39257291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37998670212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/cat-searcher_gemma-2-9b-it-sppo-iter-1/3c33f6b0-dc40-4a61-bbbe-063b9d8d30e3.json b/leaderboard_data/HFOpenLLMv2/google/cat-searcher_gemma-2-9b-it-sppo-iter-1/3c33f6b0-dc40-4a61-bbbe-063b9d8d30e3.json
deleted file mode 100644
index 2e214dea82aa24bf49c94289f08fc85750f77769..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/cat-searcher_gemma-2-9b-it-sppo-iter-1/3c33f6b0-dc40-4a61-bbbe-063b9d8d30e3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/cat-searcher_gemma-2-9b-it-sppo-iter-1/1762652580.091131",
- "retrieved_timestamp": "1762652580.091137",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "cat-searcher/gemma-2-9b-it-sppo-iter-1",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "cat-searcher/gemma-2-9b-it-sppo-iter-1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30147674836101546
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5971867698707507
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08308157099697885
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3447986577181208
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39266666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38538896276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/cognitivecomputations_dolphin-2.9.4-gemma2-2b/29a10f53-dd38-437b-a7f3-9756035df640.json b/leaderboard_data/HFOpenLLMv2/google/cognitivecomputations_dolphin-2.9.4-gemma2-2b/29a10f53-dd38-437b-a7f3-9756035df640.json
deleted file mode 100644
index cfe44da244de5fd1b56d294c7a0a355c37eb16f1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/cognitivecomputations_dolphin-2.9.4-gemma2-2b/29a10f53-dd38-437b-a7f3-9756035df640.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/cognitivecomputations_dolphin-2.9.4-gemma2-2b/1762652580.115823",
- "retrieved_timestamp": "1762652580.115823",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "cognitivecomputations/dolphin-2.9.4-gemma2-2b",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "cognitivecomputations/dolphin-2.9.4-gemma2-2b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08955127949396491
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40813187411055213
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04909365558912387
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41796875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2105219414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/djuna_Gemma-2-gemmama-9b/b2f24392-29aa-4a24-b489-87ea9b85daea.json b/leaderboard_data/HFOpenLLMv2/google/djuna_Gemma-2-gemmama-9b/b2f24392-29aa-4a24-b489-87ea9b85daea.json
deleted file mode 100644
index aba38c4451e4db5a839c2654591ee9b143206be6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/djuna_Gemma-2-gemmama-9b/b2f24392-29aa-4a24-b489-87ea9b85daea.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/djuna_Gemma-2-gemmama-9b/1762652580.12782",
- "retrieved_timestamp": "1762652580.127821",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "djuna/Gemma-2-gemmama-9b",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "djuna/Gemma-2-gemmama-9b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7703404743857409
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5420037856495951
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19259818731117825
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33557046979865773
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4031458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3109208776595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/dwikitheduck_gemma-2-2b-id-inst/6d66b056-c83d-49b8-ac84-04396c0d97df.json b/leaderboard_data/HFOpenLLMv2/google/dwikitheduck_gemma-2-2b-id-inst/6d66b056-c83d-49b8-ac84-04396c0d97df.json
deleted file mode 100644
index 8dd0d9b812ea29eb44f410b318248dab7a84e854..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/dwikitheduck_gemma-2-2b-id-inst/6d66b056-c83d-49b8-ac84-04396c0d97df.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dwikitheduck_gemma-2-2b-id-inst/1762652580.137194",
- "retrieved_timestamp": "1762652580.137195",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dwikitheduck/gemma-2-2b-id-inst",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "dwikitheduck/gemma-2-2b-id-inst"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38785644312646006
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39621721241423097
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.045317220543806644
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41542708333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21733710106382978
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/dwikitheduck_gemma-2-2b-id/000b7f0b-9e2f-499a-9bab-b08767efb8ca.json b/leaderboard_data/HFOpenLLMv2/google/dwikitheduck_gemma-2-2b-id/000b7f0b-9e2f-499a-9bab-b08767efb8ca.json
deleted file mode 100644
index 318d55a33af1a617bdd97a4891ba3a0ee3e2d5d1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/dwikitheduck_gemma-2-2b-id/000b7f0b-9e2f-499a-9bab-b08767efb8ca.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/dwikitheduck_gemma-2-2b-id/1762652580.136933",
- "retrieved_timestamp": "1762652580.136933",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "dwikitheduck/gemma-2-2b-id",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "dwikitheduck/gemma-2-2b-id"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38785644312646006
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39621721241423097
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.045317220543806644
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41542708333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21733710106382978
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/ehristoforu_Gemma2-9B-it-psy10k-mental_health/25c93024-ce65-49d5-96da-00107bb37f77.json b/leaderboard_data/HFOpenLLMv2/google/ehristoforu_Gemma2-9B-it-psy10k-mental_health/25c93024-ce65-49d5-96da-00107bb37f77.json
deleted file mode 100644
index f0b08fea4baccbaf7c40534659bc36a7ecccec2f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/ehristoforu_Gemma2-9B-it-psy10k-mental_health/25c93024-ce65-49d5-96da-00107bb37f77.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_Gemma2-9B-it-psy10k-mental_health/1762652580.139083",
- "retrieved_timestamp": "1762652580.139084",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/Gemma2-9B-it-psy10k-mental_health",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "ehristoforu/Gemma2-9B-it-psy10k-mental_health"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5886658510529839
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5539376944027642
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16314199395770393
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.337248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40860416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38289561170212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/ehristoforu_Gemma2-9b-it-train6/e289e629-17dd-440e-8839-d5dcbe535fd6.json b/leaderboard_data/HFOpenLLMv2/google/ehristoforu_Gemma2-9b-it-train6/e289e629-17dd-440e-8839-d5dcbe535fd6.json
deleted file mode 100644
index f85136e91c1c6fa5331de705f30ee576848d6e7c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/ehristoforu_Gemma2-9b-it-train6/e289e629-17dd-440e-8839-d5dcbe535fd6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ehristoforu_Gemma2-9b-it-train6/1762652580.1393359",
- "retrieved_timestamp": "1762652580.139337",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ehristoforu/Gemma2-9b-it-train6",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "ehristoforu/Gemma2-9b-it-train6"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7025215317579578
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5898092579133603
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19108761329305135
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3288590604026846
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40841666666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39419880319148937
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/ell44ot_gemma-2b-def/9ba31c7b-13df-46f2-a164-1729563707e1.json b/leaderboard_data/HFOpenLLMv2/google/ell44ot_gemma-2b-def/9ba31c7b-13df-46f2-a164-1729563707e1.json
deleted file mode 100644
index 052cbf5e48eededa34e9617593af858232c25e48..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/ell44ot_gemma-2b-def/9ba31c7b-13df-46f2-a164-1729563707e1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ell44ot_gemma-2b-def/1762652580.147274",
- "retrieved_timestamp": "1762652580.147275",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ell44ot/gemma-2b-def",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "ell44ot/gemma-2b-def"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26930433472076315
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31586532094752634
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02416918429003021
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27348993288590606
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36702083333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15724734042553193
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GemmaModel",
- "params_billions": 1.546
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_codegemma-1.1-2b/9d92e421-c458-4ad3-b9bf-45c0ca1b90cf.json b/leaderboard_data/HFOpenLLMv2/google/google_codegemma-1.1-2b/9d92e421-c458-4ad3-b9bf-45c0ca1b90cf.json
deleted file mode 100644
index c113455c95ea872f7cbcf7b48193ba42219a5511..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_codegemma-1.1-2b/9d92e421-c458-4ad3-b9bf-45c0ca1b90cf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_codegemma-1.1-2b/1762652580.172607",
- "retrieved_timestamp": "1762652580.172608",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/codegemma-1.1-2b",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/codegemma-1.1-2b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22936253584932426
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3353417790248454
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01283987915407855
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2651006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3871458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1278257978723404
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 2.506
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-base/69eb63bf-72dd-4995-a8ec-49fd304a8ee7.json b/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-base/69eb63bf-72dd-4995-a8ec-49fd304a8ee7.json
deleted file mode 100644
index 2abcd5be26f59618dce3ee3096ad3be791fd2130..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-base/69eb63bf-72dd-4995-a8ec-49fd304a8ee7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_flan-t5-base/1762652580.172907",
- "retrieved_timestamp": "1762652580.172908",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/flan-t5-base",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/flan-t5-base"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18907055501624578
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3525980599300322
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.010574018126888218
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23825503355704697
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36711458333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13572140957446807
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "T5ForConditionalGeneration",
- "params_billions": 0.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-large/eb2e1202-9292-4f5e-a366-abc84897c66d.json b/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-large/eb2e1202-9292-4f5e-a366-abc84897c66d.json
deleted file mode 100644
index a2ef83dd40f7d0595b036f5c66f6a54b564bbdf2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-large/eb2e1202-9292-4f5e-a366-abc84897c66d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_flan-t5-large/1762652580.173132",
- "retrieved_timestamp": "1762652580.1731331",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/flan-t5-large",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/flan-t5-large"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22009490374428736
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41531150356794316
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.014350453172205438
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25083892617449666
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40832291666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17087765957446807
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "T5ForConditionalGeneration",
- "params_billions": 0.783
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-small/368a36c5-8211-4240-ac88-3fd5e5414310.json b/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-small/368a36c5-8211-4240-ac88-3fd5e5414310.json
deleted file mode 100644
index 1158cb78acf4d9d6f96ce8db75456dbcf5d4cdef..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-small/368a36c5-8211-4240-ac88-3fd5e5414310.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_flan-t5-small/1762652580.173366",
- "retrieved_timestamp": "1762652580.173366",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/flan-t5-small",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/flan-t5-small"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1524255641697363
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3282901097640842
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0075528700906344415
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41229166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1233377659574468
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "T5ForConditionalGeneration",
- "params_billions": 0.077
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-xl/98a6a294-7b5d-4279-8aa6-6ed16248ce0b.json b/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-xl/98a6a294-7b5d-4279-8aa6-6ed16248ce0b.json
deleted file mode 100644
index 27e3e4c1542c604ba3d58f8c19892de5deb9e5f1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-xl/98a6a294-7b5d-4279-8aa6-6ed16248ce0b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_flan-t5-xl/1762652580.1738272",
- "retrieved_timestamp": "1762652580.1738281",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/flan-t5-xl",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/flan-t5-xl"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2206944241279804
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45372172155693963
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0007552870090634442
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24580536912751677
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42203125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21417885638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "T5ForConditionalGeneration",
- "params_billions": 2.85
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-xl/ab0ac321-1c2b-4523-b48c-de47ff06e7a3.json b/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-xl/ab0ac321-1c2b-4523-b48c-de47ff06e7a3.json
deleted file mode 100644
index caaad62bc31a2a7fbfc61ad911ae8fd8e7878d6c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-xl/ab0ac321-1c2b-4523-b48c-de47ff06e7a3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_flan-t5-xl/1762652580.173602",
- "retrieved_timestamp": "1762652580.173603",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/flan-t5-xl",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/flan-t5-xl"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22374189373085634
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45310636062112314
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0075528700906344415
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2525167785234899
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41809375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21467752659574468
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "T5ForConditionalGeneration",
- "params_billions": 2.85
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-xxl/e15f4783-510e-4b92-a999-072caa425d4c.json b/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-xxl/e15f4783-510e-4b92-a999-072caa425d4c.json
deleted file mode 100644
index 2476c4752a184ff97482e71b496f90a91d09cafa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_flan-t5-xxl/e15f4783-510e-4b92-a999-072caa425d4c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_flan-t5-xxl/1762652580.174026",
- "retrieved_timestamp": "1762652580.174026",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/flan-t5-xxl",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/flan-t5-xxl"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2200450360598767
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5065888015776924
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.010574018126888218
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42175
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23429188829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "T5ForConditionalGeneration",
- "params_billions": 11.267
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_flan-ul2/99941572-3e23-467c-97df-dfe1a2aa9805.json b/leaderboard_data/HFOpenLLMv2/google/google_flan-ul2/99941572-3e23-467c-97df-dfe1a2aa9805.json
deleted file mode 100644
index 3323a8057555485144bb26ba7ba709179daddab5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_flan-ul2/99941572-3e23-467c-97df-dfe1a2aa9805.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_flan-ul2/1762652580.174251",
- "retrieved_timestamp": "1762652580.174251",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/flan-ul2",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/flan-ul2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23925406809487715
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5053738049125648
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.00906344410876133
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.287751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3843541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24933510638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "T5ForConditionalGeneration",
- "params_billions": 19.46
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-1.1-2b-it/5ed676b6-4aff-4d71-a91a-6d5d9feeb28f.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-1.1-2b-it/5ed676b6-4aff-4d71-a91a-6d5d9feeb28f.json
deleted file mode 100644
index dc3f1c19d05f60e3f51c47a33b2d7ffaa51fc76b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-1.1-2b-it/5ed676b6-4aff-4d71-a91a-6d5d9feeb28f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_gemma-1.1-2b-it/1762652580.1745641",
- "retrieved_timestamp": "1762652580.174565",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/gemma-1.1-2b-it",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/gemma-1.1-2b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30674831668860847
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3184634974814922
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01812688821752266
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26929530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33939583333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14835438829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 2.506
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-1.1-7b-it/6929c338-76a5-4386-9fa8-68e35a989a86.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-1.1-7b-it/6929c338-76a5-4386-9fa8-68e35a989a86.json
deleted file mode 100644
index 90fa3ab088f41f684fa38928df55d490f9e5b2f6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-1.1-7b-it/6929c338-76a5-4386-9fa8-68e35a989a86.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_gemma-1.1-7b-it/1762652580.1748302",
- "retrieved_timestamp": "1762652580.1748302",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/gemma-1.1-7b-it",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/gemma-1.1-7b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5039107346285633
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3935297962833251
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04909365558912387
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42302083333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2583942819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 8.538
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-27b-it/5bcf96ce-efd1-4f90-91a1-edd548de71ad.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-27b-it/5bcf96ce-efd1-4f90-91a1-edd548de71ad.json
deleted file mode 100644
index f4a1a2921ff0d51fdc6dcd6d90050a20adc91545..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-27b-it/5bcf96ce-efd1-4f90-91a1-edd548de71ad.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_gemma-2-27b-it/1762652580.17537",
- "retrieved_timestamp": "1762652580.175371",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/gemma-2-27b-it",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/gemma-2-27b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7977677008116243
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6451387433168799
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23867069486404835
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.375
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40330208333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4451462765957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-27b/12f7d5a6-3f8b-49d8-9ca8-38774dbcca92.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-27b/12f7d5a6-3f8b-49d8-9ca8-38774dbcca92.json
deleted file mode 100644
index 59f8f5644a42216bc4b12c5ef5f0cb1aacb32e17..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-27b/12f7d5a6-3f8b-49d8-9ca8-38774dbcca92.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_gemma-2-27b/1762652580.175144",
- "retrieved_timestamp": "1762652580.175145",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/gemma-2-27b",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/gemma-2-27b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24752213017017072
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5642908317482057
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1661631419939577
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35067114093959734
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43963541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4370844414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b-it/64daa9ea-cf1e-4787-90cf-ed72c5e23afd.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b-it/64daa9ea-cf1e-4787-90cf-ed72c5e23afd.json
deleted file mode 100644
index c20168b194fb8939305662925db9c5544027297d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b-it/64daa9ea-cf1e-4787-90cf-ed72c5e23afd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_gemma-2-2b-it/1762652580.176172",
- "retrieved_timestamp": "1762652580.176194",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/gemma-2-2b-it",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/gemma-2-2b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5668337788179807
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41992308914274706
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0007552870090634441
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39288541666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25498670212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "InternLM2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b-jpn-it/251b93fa-6f12-41bc-85c8-ded52e1a0d2d.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b-jpn-it/251b93fa-6f12-41bc-85c8-ded52e1a0d2d.json
deleted file mode 100644
index 26f68cc2e61bb49683c5f4e3389fdf740003414f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b-jpn-it/251b93fa-6f12-41bc-85c8-ded52e1a0d2d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_gemma-2-2b-jpn-it/1762652580.1767948",
- "retrieved_timestamp": "1762652580.176796",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/gemma-2-2b-jpn-it",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/gemma-2-2b-jpn-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5288401441508531
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4178440226217119
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04758308157099698
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2751677852348993
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37276041666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2466755319148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b-jpn-it/a09fdbce-489c-4d14-a05f-7663121bece7.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b-jpn-it/a09fdbce-489c-4d14-a05f-7663121bece7.json
deleted file mode 100644
index de5c8169fdb1de2dc2c0995c623212bb227fa8b9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b-jpn-it/a09fdbce-489c-4d14-a05f-7663121bece7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_gemma-2-2b-jpn-it/1762652580.176506",
- "retrieved_timestamp": "1762652580.176507",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/gemma-2-2b-jpn-it",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/gemma-2-2b-jpn-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5077826832803628
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42255698900658106
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03474320241691843
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39638541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2578125
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b/07e74f27-e0c3-448f-9a8c-a07ff8a73178.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b/07e74f27-e0c3-448f-9a8c-a07ff8a73178.json
deleted file mode 100644
index cf64654349e6196e5ef0d75a1b4cd96c0050d72d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b/07e74f27-e0c3-448f-9a8c-a07ff8a73178.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_gemma-2-2b/1762652580.175597",
- "retrieved_timestamp": "1762652580.1755981",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/gemma-2-2b",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/gemma-2-2b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19931226922343825
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3655966996422591
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.028700906344410877
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4231770833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21800199468085107
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "InternLM2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b/53fb75b1-2d9f-4af3-a358-18bf5d4a9032.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b/53fb75b1-2d9f-4af3-a358-18bf5d4a9032.json
deleted file mode 100644
index e9df8fe5573ac5432d86afa4e10c5d43ce8064c7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-2b/53fb75b1-2d9f-4af3-a358-18bf5d4a9032.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_gemma-2-2b/1762652580.1759539",
- "retrieved_timestamp": "1762652580.175955",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/gemma-2-2b",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/gemma-2-2b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20176021844262113
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3708674612470255
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.030211480362537766
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.421875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22165890957446807
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "InternLM2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-9b-it/e8cef406-d6cc-48bd-872f-3d5b74bcf092.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-9b-it/e8cef406-d6cc-48bd-872f-3d5b74bcf092.json
deleted file mode 100644
index 9a14c1bcdf976d8afd9504ee0f1d4d1174669f8f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-9b-it/e8cef406-d6cc-48bd-872f-3d5b74bcf092.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_gemma-2-9b-it/1762652580.177257",
- "retrieved_timestamp": "1762652580.177258",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/gemma-2-9b-it",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/gemma-2-9b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7435626360279614
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5990342504164132
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19486404833836857
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36073825503355705
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4072708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3875498670212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-9b/2ac50111-a850-4bd2-8136-c373990742a5.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-9b/2ac50111-a850-4bd2-8136-c373990742a5.json
deleted file mode 100644
index a1995120574f0cdad7a8b58fb5dcc9275be2fd32..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2-9b/2ac50111-a850-4bd2-8136-c373990742a5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_gemma-2-9b/1762652580.177011",
- "retrieved_timestamp": "1762652580.177012",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/gemma-2-9b",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/gemma-2-9b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20398320899657355
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5377373397621884
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13444108761329304
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3288590604026846
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4461145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4103224734042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2b-it/50dffd1a-ddf5-40fd-a2c8-e5dd140af617.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-2b-it/50dffd1a-ddf5-40fd-a2c8-e5dd140af617.json
deleted file mode 100644
index 6e49827d85f6c3517e86a5115e43d937a989da10..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2b-it/50dffd1a-ddf5-40fd-a2c8-e5dd140af617.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_gemma-2b-it/1762652580.17777",
- "retrieved_timestamp": "1762652580.17777",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/gemma-2b-it",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/gemma-2b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26902950837112194
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31508191988788464
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02039274924471299
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2785234899328859
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.334125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13530585106382978
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 2.506
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2b/2dd86ebc-0253-4801-ac99-2bb3494ad29b.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-2b/2dd86ebc-0253-4801-ac99-2bb3494ad29b.json
deleted file mode 100644
index 17adafbabf30d1eaf784ee71bfc1e2ea135748b9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-2b/2dd86ebc-0253-4801-ac99-2bb3494ad29b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_gemma-2b/1762652580.177512",
- "retrieved_timestamp": "1762652580.177513",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/gemma-2b",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/gemma-2b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20375825033134307
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33656381705857935
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.030211480362537766
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2550335570469799
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39778125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13655252659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 2.506
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-7b-it/30146048-ee0f-431d-b3e7-8c066c820740.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-7b-it/30146048-ee0f-431d-b3e7-8c066c820740.json
deleted file mode 100644
index e18ad115d9c410f3546711ceaa8e58904002a7e1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-7b-it/30146048-ee0f-431d-b3e7-8c066c820740.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_gemma-7b-it/1762652580.178242",
- "retrieved_timestamp": "1762652580.1782432",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/gemma-7b-it",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/gemma-7b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3868324933398937
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36459012743300967
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02945619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42742708333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16946476063829788
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 8.538
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_gemma-7b/630e3cc0-fccc-41b3-b439-85a875dae401.json b/leaderboard_data/HFOpenLLMv2/google/google_gemma-7b/630e3cc0-fccc-41b3-b439-85a875dae401.json
deleted file mode 100644
index 7b466c7d23986a94a4e8bf4849f5595f9008ee12..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_gemma-7b/630e3cc0-fccc-41b3-b439-85a875dae401.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_gemma-7b/1762652580.1780128",
- "retrieved_timestamp": "1762652580.178014",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/gemma-7b",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/gemma-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2659321710838353
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43615285239286355
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07401812688821752
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28691275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4062395833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2947972074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 8.538
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_mt5-base/621fb00c-90a0-4295-9bd6-f5e102bc0bab.json b/leaderboard_data/HFOpenLLMv2/google/google_mt5-base/621fb00c-90a0-4295-9bd6-f5e102bc0bab.json
deleted file mode 100644
index 7a1044fc2409bd14ad62069d9d613c587619737c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_mt5-base/621fb00c-90a0-4295-9bd6-f5e102bc0bab.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_mt5-base/1762652580.178463",
- "retrieved_timestamp": "1762652580.178463",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/mt5-base",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/mt5-base"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1645157072124186
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28831600228488835
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.00906344410876133
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23909395973154363
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36720833333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10696476063829788
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MT5ForConditionalGeneration",
- "params_billions": 0.39
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_mt5-small/0d958c7c-5cd9-459f-a0e9-235b5d41ae53.json b/leaderboard_data/HFOpenLLMv2/google/google_mt5-small/0d958c7c-5cd9-459f-a0e9-235b5d41ae53.json
deleted file mode 100644
index 0bf4ff92aadcb0cb95f835ed32a950288c6bed59..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_mt5-small/0d958c7c-5cd9-459f-a0e9-235b5d41ae53.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_mt5-small/1762652580.1787279",
- "retrieved_timestamp": "1762652580.178729",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/mt5-small",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/mt5-small"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17180968718555653
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2765842029929075
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2424496644295302
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38575
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11228390957446809
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MT5ForConditionalGeneration",
- "params_billions": 0.17
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_mt5-xl/5abb3ce9-6ad4-4dfa-8bca-81ec6cb84426.json b/leaderboard_data/HFOpenLLMv2/google/google_mt5-xl/5abb3ce9-6ad4-4dfa-8bca-81ec6cb84426.json
deleted file mode 100644
index d9ac4c2e5b0ff5961f8a3480e5b004b7276bdabc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_mt5-xl/5abb3ce9-6ad4-4dfa-8bca-81ec6cb84426.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_mt5-xl/1762652580.17897",
- "retrieved_timestamp": "1762652580.1789708",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/mt5-xl",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/mt5-xl"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19596448534333347
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.304735837080435
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3795208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11195146276595745
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MT5ForConditionalGeneration",
- "params_billions": 3.23
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_mt5-xxl/38520cce-b3b6-4f22-a6a8-313f6181f5ea.json b/leaderboard_data/HFOpenLLMv2/google/google_mt5-xxl/38520cce-b3b6-4f22-a6a8-313f6181f5ea.json
deleted file mode 100644
index ddbda16ffb7f6121aa4f53dbbde5ecb70f51b7de..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_mt5-xxl/38520cce-b3b6-4f22-a6a8-313f6181f5ea.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_mt5-xxl/1762652580.1791801",
- "retrieved_timestamp": "1762652580.1791801",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/mt5-xxl",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/mt5-xxl"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23575668116154028
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2959344159116905
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24161073825503357
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36894791666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10887632978723404
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "T5ForConditionalGeneration",
- "params_billions": 11.9
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-2b-it/a219b160-3dbd-4dcd-b39d-d12c6f9b1145.json b/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-2b-it/a219b160-3dbd-4dcd-b39d-d12c6f9b1145.json
deleted file mode 100644
index 78e1bce35d6df076b161596d0e6e5c0e43fbff4d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-2b-it/a219b160-3dbd-4dcd-b39d-d12c6f9b1145.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_recurrentgemma-2b-it/1762652580.17961",
- "retrieved_timestamp": "1762652580.179611",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/recurrentgemma-2b-it",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/recurrentgemma-2b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2949329999955673
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33300047272606553
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.019637462235649546
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2533557046979866
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3340625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1402094414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "RecurrentGemmaForCausalLM",
- "params_billions": 2.683
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-2b/218a5d0f-5242-43c4-8166-81f5c09626bb.json b/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-2b/218a5d0f-5242-43c4-8166-81f5c09626bb.json
deleted file mode 100644
index c1103dbb40d4b035331422ce6fec37101dff32f4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-2b/218a5d0f-5242-43c4-8166-81f5c09626bb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_recurrentgemma-2b/1762652580.179393",
- "retrieved_timestamp": "1762652580.179394",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/recurrentgemma-2b",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/recurrentgemma-2b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3017028151970106
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31973582830084474
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02039274924471299
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24580536912751677
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3445729166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11760305851063829
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "RecurrentGemmaForCausalLM",
- "params_billions": 2.683
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-9b-it/c7095b76-2d50-467b-a8d9-d7a277f1f14c.json b/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-9b-it/c7095b76-2d50-467b-a8d9-d7a277f1f14c.json
deleted file mode 100644
index 7bb86b9ece05231a8100bbee02e824908982e0d8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-9b-it/c7095b76-2d50-467b-a8d9-d7a277f1f14c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_recurrentgemma-9b-it/1762652580.180049",
- "retrieved_timestamp": "1762652580.18005",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/recurrentgemma-9b-it",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/recurrentgemma-9b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5010383560065071
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4367189649027647
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06646525679758308
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43790625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2843251329787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "RecurrentGemmaForCausalLM",
- "params_billions": 9.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-9b/1ff3ab95-3007-4cbf-a146-5e8e4ae65404.json b/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-9b/1ff3ab95-3007-4cbf-a146-5e8e4ae65404.json
deleted file mode 100644
index 4218dda52eaadbbfae66eb7c25beb6db98d2d08a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_recurrentgemma-9b/1ff3ab95-3007-4cbf-a146-5e8e4ae65404.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_recurrentgemma-9b/1762652580.17984",
- "retrieved_timestamp": "1762652580.179841",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/recurrentgemma-9b",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/recurrentgemma-9b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31159434744256354
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39562568669428394
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06646525679758308
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3802604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2604720744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "RecurrentGemmaForCausalLM",
- "params_billions": 9.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_switch-base-8/43e22ce0-cdd7-424f-8a01-f9fea8b2a010.json b/leaderboard_data/HFOpenLLMv2/google/google_switch-base-8/43e22ce0-cdd7-424f-8a01-f9fea8b2a010.json
deleted file mode 100644
index 2b4e27390ec85d9ac714f969ae8a3d0f8f9e825e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_switch-base-8/43e22ce0-cdd7-424f-8a01-f9fea8b2a010.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_switch-base-8/1762652580.180255",
- "retrieved_timestamp": "1762652580.180256",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/switch-base-8",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/switch-base-8"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15852050337548815
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28763132730669333
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35173958333333327
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10979055851063829
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "SwitchTransformersForConditionalGeneration",
- "params_billions": 0.62
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/google_umt5-base/659053b0-7694-41e7-916d-28406b3ed572.json b/leaderboard_data/HFOpenLLMv2/google/google_umt5-base/659053b0-7694-41e7-916d-28406b3ed572.json
deleted file mode 100644
index 0dffb525d0d75b9dd67805678287dcdafe5c2e80..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/google_umt5-base/659053b0-7694-41e7-916d-28406b3ed572.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/google_umt5-base/1762652580.180466",
- "retrieved_timestamp": "1762652580.180467",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "google/umt5-base",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "google/umt5-base"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.174632198123202
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27877262328945457
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.004531722054380665
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25419463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33821875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10779587765957446
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "UMT5ForConditionalGeneration",
- "params_billions": -1.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/grimjim_Gigantes-v1-gemma2-9b-it/57072a5e-1f64-4ae2-9e2c-caecc1dc05f4.json b/leaderboard_data/HFOpenLLMv2/google/grimjim_Gigantes-v1-gemma2-9b-it/57072a5e-1f64-4ae2-9e2c-caecc1dc05f4.json
deleted file mode 100644
index 46ab1077cd8d623cfaa03be1874b06293517d4b3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/grimjim_Gigantes-v1-gemma2-9b-it/57072a5e-1f64-4ae2-9e2c-caecc1dc05f4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/grimjim_Gigantes-v1-gemma2-9b-it/1762652580.1819131",
- "retrieved_timestamp": "1762652580.1819131",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "grimjim/Gigantes-v1-gemma2-9b-it",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "grimjim/Gigantes-v1-gemma2-9b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.692454908531585
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.597792552822268
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21450151057401812
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35318791946308725
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45547916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42253989361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/grimjim_Gigantes-v2-gemma2-9b-it/47486923-2194-4b8e-930c-ca14bd5f8a26.json b/leaderboard_data/HFOpenLLMv2/google/grimjim_Gigantes-v2-gemma2-9b-it/47486923-2194-4b8e-930c-ca14bd5f8a26.json
deleted file mode 100644
index 81c4205cb13d6a522a9b016402172614e425b285..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/grimjim_Gigantes-v2-gemma2-9b-it/47486923-2194-4b8e-930c-ca14bd5f8a26.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/grimjim_Gigantes-v2-gemma2-9b-it/1762652580.182155",
- "retrieved_timestamp": "1762652580.182156",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "grimjim/Gigantes-v2-gemma2-9b-it",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "grimjim/Gigantes-v2-gemma2-9b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7350696152874374
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5986559388303995
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20166163141993956
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35151006711409394
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45947916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4259474734042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/grimjim_Gigantes-v3-gemma2-9b-it/bb063d7a-65fa-416b-88e9-7bacdef1da3e.json b/leaderboard_data/HFOpenLLMv2/google/grimjim_Gigantes-v3-gemma2-9b-it/bb063d7a-65fa-416b-88e9-7bacdef1da3e.json
deleted file mode 100644
index c4a70ca160cc43fc8de5d139e42eb970a51514ea..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/grimjim_Gigantes-v3-gemma2-9b-it/bb063d7a-65fa-416b-88e9-7bacdef1da3e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/grimjim_Gigantes-v3-gemma2-9b-it/1762652580.182362",
- "retrieved_timestamp": "1762652580.1823628",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "grimjim/Gigantes-v3-gemma2-9b-it",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "grimjim/Gigantes-v3-gemma2-9b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.697625633319592
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5983513792324827
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20996978851963746
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3565436241610738
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4608125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4226230053191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/grimjim_Magnolia-v1-Gemma2-8k-9B/2cf17692-b105-41df-9783-6c7728ab778f.json b/leaderboard_data/HFOpenLLMv2/google/grimjim_Magnolia-v1-Gemma2-8k-9B/2cf17692-b105-41df-9783-6c7728ab778f.json
deleted file mode 100644
index a1751ce93320e6f60ee25614ca7aefe49be81e1f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/grimjim_Magnolia-v1-Gemma2-8k-9B/2cf17692-b105-41df-9783-6c7728ab778f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v1-Gemma2-8k-9B/1762652580.1841059",
- "retrieved_timestamp": "1762652580.1841059",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "grimjim/Magnolia-v1-Gemma2-8k-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "grimjim/Magnolia-v1-Gemma2-8k-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35308536904302806
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5589031767575711
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16842900302114805
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33640939597315433
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46446875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4242021276595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/grimjim_Magnolia-v2-Gemma2-8k-9B/4d0574f4-4d91-4395-afff-133216eee509.json b/leaderboard_data/HFOpenLLMv2/google/grimjim_Magnolia-v2-Gemma2-8k-9B/4d0574f4-4d91-4395-afff-133216eee509.json
deleted file mode 100644
index 86d80bc0f4d9e8e9f4416b136584010bd570bb84..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/grimjim_Magnolia-v2-Gemma2-8k-9B/4d0574f4-4d91-4395-afff-133216eee509.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v2-Gemma2-8k-9B/1762652580.184566",
- "retrieved_timestamp": "1762652580.184567",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "grimjim/Magnolia-v2-Gemma2-8k-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "grimjim/Magnolia-v2-Gemma2-8k-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7384417789243651
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6015773428405322
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2280966767371601
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3573825503355705
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44884375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4331781914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/grimjim_Magnolia-v3-Gemma2-8k-9B/8fff2cec-a733-4505-bce9-8b605044181a.json b/leaderboard_data/HFOpenLLMv2/google/grimjim_Magnolia-v3-Gemma2-8k-9B/8fff2cec-a733-4505-bce9-8b605044181a.json
deleted file mode 100644
index 221bfc0806afe9ea32f6d3eb82bc32099cd0ea67..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/grimjim_Magnolia-v3-Gemma2-8k-9B/8fff2cec-a733-4505-bce9-8b605044181a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v3-Gemma2-8k-9B/1762652580.1850398",
- "retrieved_timestamp": "1762652580.185041",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "grimjim/Magnolia-v3-Gemma2-8k-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "grimjim/Magnolia-v3-Gemma2-8k-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7378422585406721
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6015406636327695
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23187311178247735
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3565436241610738
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4488125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43367686170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/grimjim_Magot-v1-Gemma2-8k-9B/9e63ff64-f862-40ad-b594-31063ec0d31e.json b/leaderboard_data/HFOpenLLMv2/google/grimjim_Magot-v1-Gemma2-8k-9B/9e63ff64-f862-40ad-b594-31063ec0d31e.json
deleted file mode 100644
index f4421a1c9147a815177bb35ae0d3e5c8930aca37..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/grimjim_Magot-v1-Gemma2-8k-9B/9e63ff64-f862-40ad-b594-31063ec0d31e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/grimjim_Magot-v1-Gemma2-8k-9B/1762652580.185666",
- "retrieved_timestamp": "1762652580.185667",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "grimjim/Magot-v1-Gemma2-8k-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "grimjim/Magot-v1-Gemma2-8k-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29967818720993633
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6019447732218105
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09894259818731117
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3464765100671141
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44884375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43367686170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/grimjim_Magot-v2-Gemma2-8k-9B/2d250aa8-f3c5-4f9f-9e5c-dde8f720db53.json b/leaderboard_data/HFOpenLLMv2/google/grimjim_Magot-v2-Gemma2-8k-9B/2d250aa8-f3c5-4f9f-9e5c-dde8f720db53.json
deleted file mode 100644
index c4f29f48b80d3bbdd4f327b0047ff47eddb93ee0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/grimjim_Magot-v2-Gemma2-8k-9B/2d250aa8-f3c5-4f9f-9e5c-dde8f720db53.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/grimjim_Magot-v2-Gemma2-8k-9B/1762652580.185882",
- "retrieved_timestamp": "1762652580.1858828",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "grimjim/Magot-v2-Gemma2-8k-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "grimjim/Magot-v2-Gemma2-8k-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7347449212533854
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5896713649821103
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20166163141993956
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3540268456375839
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4343958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4222905585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2Crono-27B/501e2a2c-e32c-455e-8e5f-f8bde053fddc.json b/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2Crono-27B/501e2a2c-e32c-455e-8e5f-f8bde053fddc.json
deleted file mode 100644
index beb48a038c142caded91df9fe94d4c603f0d399f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2Crono-27B/501e2a2c-e32c-455e-8e5f-f8bde053fddc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_Gemma2Crono-27B/1762652580.193866",
- "retrieved_timestamp": "1762652580.193866",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/Gemma2Crono-27B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "hotmailuser/Gemma2Crono-27B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7086164709637096
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6505341690680219
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24244712990936557
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37080536912751677
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45668749999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4632646276595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2SimPO-27B/433a8abf-8ff7-40bb-a4d0-654efdb6bf86.json b/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2SimPO-27B/433a8abf-8ff7-40bb-a4d0-654efdb6bf86.json
deleted file mode 100644
index 9d2f8db9249aae931546e52df89a2d2b7575c978..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2SimPO-27B/433a8abf-8ff7-40bb-a4d0-654efdb6bf86.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_Gemma2SimPO-27B/1762652580.194106",
- "retrieved_timestamp": "1762652580.1941068",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/Gemma2SimPO-27B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "hotmailuser/Gemma2SimPO-27B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7222303488078299
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6413158976157102
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28172205438066467
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35822147651006714
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44465625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46417885638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2atlas-27B/c9020f27-9175-4f12-a108-6cbff1c0cb22.json b/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2atlas-27B/c9020f27-9175-4f12-a108-6cbff1c0cb22.json
deleted file mode 100644
index effa3723a98b85e5c47f42829abab8acda2d044d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2atlas-27B/c9020f27-9175-4f12-a108-6cbff1c0cb22.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_Gemma2atlas-27B/1762652580.1943119",
- "retrieved_timestamp": "1762652580.194313",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/Gemma2atlas-27B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "hotmailuser/Gemma2atlas-27B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7213560020744957
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6544960921220462
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21450151057401812
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35570469798657717
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44453125000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4749833776595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2magnum-27b/0ad192a1-b33f-4362-a21d-ccc590986c5c.json b/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2magnum-27b/0ad192a1-b33f-4362-a21d-ccc590986c5c.json
deleted file mode 100644
index ec055e1f18620caa755773c68573ce461e870021..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/hotmailuser_Gemma2magnum-27b/0ad192a1-b33f-4362-a21d-ccc590986c5c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_Gemma2magnum-27b/1762652580.1945128",
- "retrieved_timestamp": "1762652580.194514",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/Gemma2magnum-27b",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "hotmailuser/Gemma2magnum-27b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5050599077115387
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6199590493843724
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22054380664652568
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3850671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47234375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45960771276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/ifable_gemma-2-Ifable-9B/42b3b64b-0e15-4a49-b542-da27ab7e2143.json b/leaderboard_data/HFOpenLLMv2/google/ifable_gemma-2-Ifable-9B/42b3b64b-0e15-4a49-b542-da27ab7e2143.json
deleted file mode 100644
index 2d47c7cc2f25ab02060dd5dfed016843aaed28d4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/ifable_gemma-2-Ifable-9B/42b3b64b-0e15-4a49-b542-da27ab7e2143.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ifable_gemma-2-Ifable-9B/1762652580.225604",
- "retrieved_timestamp": "1762652580.225605",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ifable/gemma-2-Ifable-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "ifable/gemma-2-Ifable-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2984292787581395
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5866115556693244
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13972809667673716
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3414429530201342
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40525000000000005
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4226230053191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/jebish7_gemma-2-2b-it/86206a02-3ab9-4a86-a00c-2900e8cd2e18.json b/leaderboard_data/HFOpenLLMv2/google/jebish7_gemma-2-2b-it/86206a02-3ab9-4a86-a00c-2900e8cd2e18.json
deleted file mode 100644
index 90a53be85ddaf1beff8680910444af629cf7cd98..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/jebish7_gemma-2-2b-it/86206a02-3ab9-4a86-a00c-2900e8cd2e18.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jebish7_gemma-2-2b-it/1762652580.2824588",
- "retrieved_timestamp": "1762652580.2824588",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jebish7/gemma-2-2b-it",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "jebish7/gemma-2-2b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12717035244263
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43951564907099594
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.033987915407854986
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42444791666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27152593085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/jebish7_gemma-2-9b-it/80a35d79-893b-439f-b100-a538a3c86974.json b/leaderboard_data/HFOpenLLMv2/google/jebish7_gemma-2-9b-it/80a35d79-893b-439f-b100-a538a3c86974.json
deleted file mode 100644
index 8a9b186c590b94a0e016273a40b66988cf9c7880..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/jebish7_gemma-2-9b-it/80a35d79-893b-439f-b100-a538a3c86974.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jebish7_gemma-2-9b-it/1762652580.282719",
- "retrieved_timestamp": "1762652580.28272",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jebish7/gemma-2-9b-it",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "jebish7/gemma-2-9b-it"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1557467519514887
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5949210568047724
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08459214501510574
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34731543624161076
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4554479166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.414311835106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-9B/9ba72d50-4321-4383-8be9-286a56607624.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-9B/9ba72d50-4321-4383-8be9-286a56607624.json
deleted file mode 100644
index bd5c56febbbb82a653ef9907c2ae6cb8da834a62..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-9B/9ba72d50-4321-4383-8be9-286a56607624.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-9B/1762652580.31483",
- "retrieved_timestamp": "1762652580.314831",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lemon07r/Gemma-2-Ataraxy-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "lemon07r/Gemma-2-Ataraxy-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3008772279773224
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5931298417725773
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08534743202416918
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3347315436241611
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4424270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4226230053191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-Advanced-9B/7806d1aa-b9e2-45bc-b89d-76e6c48dd3a0.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-Advanced-9B/7806d1aa-b9e2-45bc-b89d-76e6c48dd3a0.json
deleted file mode 100644
index b77ef0acc3e52b24d48b298eca7af62428084514..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-Advanced-9B/7806d1aa-b9e2-45bc-b89d-76e6c48dd3a0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-Advanced-9B/1762652580.315091",
- "retrieved_timestamp": "1762652580.315092",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lemon07r/Gemma-2-Ataraxy-Advanced-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "lemon07r/Gemma-2-Ataraxy-Advanced-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5515964308036011
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5889067263184956
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19788519637462235
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33557046979865773
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3760729166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4243683510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-Remix-9B/29dfbb00-8760-46d8-bef8-d036870fb0c0.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-Remix-9B/29dfbb00-8760-46d8-bef8-d036870fb0c0.json
deleted file mode 100644
index 0e39f1369e6d2eee858a9746c99aa62313eb2b90..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-Remix-9B/29dfbb00-8760-46d8-bef8-d036870fb0c0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-Remix-9B/1762652580.31531",
- "retrieved_timestamp": "1762652580.3153112",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lemon07r/Gemma-2-Ataraxy-Remix-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "lemon07r/Gemma-2-Ataraxy-Remix-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7083416446140685
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5892021015046846
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20166163141993956
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3389261744966443
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4371875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42386968085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v2-9B/ca1b9625-0112-4ebf-b1c3-d2dd217d50b2.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v2-9B/ca1b9625-0112-4ebf-b1c3-d2dd217d50b2.json
deleted file mode 100644
index 07a4545a4bbcd687ab559bedcc24c3e1ef101fb3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v2-9B/ca1b9625-0112-4ebf-b1c3-d2dd217d50b2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v2-9B/1762652580.315539",
- "retrieved_timestamp": "1762652580.31554",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lemon07r/Gemma-2-Ataraxy-v2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "lemon07r/Gemma-2-Ataraxy-v2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21362429464930827
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5765835815625312
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08459214501510574
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3422818791946309
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34838541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.422124335106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v2a-9B/4fa1e172-f570-4a96-b53a-8ecf31854191.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v2a-9B/4fa1e172-f570-4a96-b53a-8ecf31854191.json
deleted file mode 100644
index cd1dd636aea0d20959029909b7dedcae0fdd93ee..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v2a-9B/4fa1e172-f570-4a96-b53a-8ecf31854191.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v2a-9B/1762652580.315754",
- "retrieved_timestamp": "1762652580.315755",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lemon07r/Gemma-2-Ataraxy-v2a-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "lemon07r/Gemma-2-Ataraxy-v2a-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15946909755005606
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.518248966271832
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06117824773413897
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33976510067114096
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31647916666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35147938829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v2f-9B/fd59fb1c-3681-44d2-9172-b10891ae9c55.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v2f-9B/fd59fb1c-3681-44d2-9172-b10891ae9c55.json
deleted file mode 100644
index 1323909e036d3459a29e3f9df907af0bd23f2840..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v2f-9B/fd59fb1c-3681-44d2-9172-b10891ae9c55.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v2f-9B/1762652580.315967",
- "retrieved_timestamp": "1762652580.315968",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lemon07r/Gemma-2-Ataraxy-v2f-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "lemon07r/Gemma-2-Ataraxy-v2f-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37911408396388246
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5192845467961766
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1163141993957704
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3389261744966443
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3231458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3503158244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3-Advanced-9B/778a10b0-c537-4592-9dbb-2b0de07ced4c.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3-Advanced-9B/778a10b0-c537-4592-9dbb-2b0de07ced4c.json
deleted file mode 100644
index e367ecf9dd7556eb20d1c8b960911c939ddeb2cb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3-Advanced-9B/778a10b0-c537-4592-9dbb-2b0de07ced4c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v3-Advanced-9B/1762652580.316169",
- "retrieved_timestamp": "1762652580.316169",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "lemon07r/Gemma-2-Ataraxy-v3-Advanced-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6601816513517467
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5935146853737787
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18731117824773413
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33640939597315433
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44496874999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41963098404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3b-9B/d048e6ad-cc57-4ebe-8376-262564e86f0c.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3b-9B/d048e6ad-cc57-4ebe-8376-262564e86f0c.json
deleted file mode 100644
index 643f9026725a558be98635105c25e6dddc21f4a8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3b-9B/d048e6ad-cc57-4ebe-8376-262564e86f0c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v3b-9B/1762652580.3163798",
- "retrieved_timestamp": "1762652580.316381",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lemon07r/Gemma-2-Ataraxy-v3b-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "lemon07r/Gemma-2-Ataraxy-v3b-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6809144181881852
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5907698162898164
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21525679758308158
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33305369127516776
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44887499999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4204621010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3i-9B/53602c70-73d9-461b-b27a-24c6a1a538e5.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3i-9B/53602c70-73d9-461b-b27a-24c6a1a538e5.json
deleted file mode 100644
index 722bad59f83d9880d8a7634f41345b5d8778e6df..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3i-9B/53602c70-73d9-461b-b27a-24c6a1a538e5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v3i-9B/1762652580.3165948",
- "retrieved_timestamp": "1762652580.316596",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lemon07r/Gemma-2-Ataraxy-v3i-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "lemon07r/Gemma-2-Ataraxy-v3i-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4203047912871182
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5625750779805955
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15332326283987915
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32802013422818793
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31806249999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41663896276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3j-9B/d435bd27-1c26-429d-8ac5-8fd8c591a9aa.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3j-9B/d435bd27-1c26-429d-8ac5-8fd8c591a9aa.json
deleted file mode 100644
index 137075982eb90127bc6bcdb788b61599913ffd4c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v3j-9B/d435bd27-1c26-429d-8ac5-8fd8c591a9aa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v3j-9B/1762652580.3168168",
- "retrieved_timestamp": "1762652580.316818",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lemon07r/Gemma-2-Ataraxy-v3j-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "lemon07r/Gemma-2-Ataraxy-v3j-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4169326276501904
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5632286961183511
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1691842900302115
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32802013422818793
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31803125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41339760638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4-Advanced-9B/c0e95e3f-37a4-4b2f-a37b-37854546c241.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4-Advanced-9B/c0e95e3f-37a4-4b2f-a37b-37854546c241.json
deleted file mode 100644
index 780ec306b06d01408ed368c6f95e1abdfa090081..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4-Advanced-9B/c0e95e3f-37a4-4b2f-a37b-37854546c241.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4-Advanced-9B/1762652580.317157",
- "retrieved_timestamp": "1762652580.3171608",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "lemon07r/Gemma-2-Ataraxy-v4-Advanced-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7015474496558022
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6023627309683861
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21525679758308158
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3389261744966443
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4580520833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4366688829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4a-Advanced-9B/b84aedba-7b87-445d-87c2-b029cb0038c3.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4a-Advanced-9B/b84aedba-7b87-445d-87c2-b029cb0038c3.json
deleted file mode 100644
index 848eef8ccc2617f82aaab208b90989e18ca24573..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4a-Advanced-9B/b84aedba-7b87-445d-87c2-b029cb0038c3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4a-Advanced-9B/1762652580.317515",
- "retrieved_timestamp": "1762652580.317516",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "lemon07r/Gemma-2-Ataraxy-v4a-Advanced-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7135123694020753
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.598838715496553
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21148036253776434
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34395973154362414
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44890625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4309341755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4b-9B/41f04f45-2f1d-42fd-87de-cc5e484cada2.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4b-9B/41f04f45-2f1d-42fd-87de-cc5e484cada2.json
deleted file mode 100644
index a6d98fe5049cb8df7856388b9a6b72fd1f4ce2b9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4b-9B/41f04f45-2f1d-42fd-87de-cc5e484cada2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4b-9B/1762652580.317803",
- "retrieved_timestamp": "1762652580.317804",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lemon07r/Gemma-2-Ataraxy-v4b-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "lemon07r/Gemma-2-Ataraxy-v4b-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6878338364428604
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6039158192304305
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23338368580060423
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34060402684563756
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45547916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4356715425531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4c-9B/9499ec24-5be2-478c-b13e-3102d1555668.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4c-9B/9499ec24-5be2-478c-b13e-3102d1555668.json
deleted file mode 100644
index 4bc386bd597e35dcf53b73e602f5e4a54a37173f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4c-9B/9499ec24-5be2-478c-b13e-3102d1555668.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4c-9B/1762652580.318075",
- "retrieved_timestamp": "1762652580.318076",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lemon07r/Gemma-2-Ataraxy-v4c-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "lemon07r/Gemma-2-Ataraxy-v4c-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6945282960323054
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6084319292299174
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22658610271903323
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3338926174496644
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45278124999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43949468085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4d-9B/7e6685d8-af21-4810-a9cc-edb296f4b937.json b/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4d-9B/7e6685d8-af21-4810-a9cc-edb296f4b937.json
deleted file mode 100644
index ad5a8c68d73dc82cbc516b1bc894a92092508de0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/lemon07r_Gemma-2-Ataraxy-v4d-9B/7e6685d8-af21-4810-a9cc-edb296f4b937.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lemon07r_Gemma-2-Ataraxy-v4d-9B/1762652580.318495",
- "retrieved_timestamp": "1762652580.318496",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lemon07r/Gemma-2-Ataraxy-v4d-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "lemon07r/Gemma-2-Ataraxy-v4d-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7250029920610646
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6054158192304304
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23338368580060423
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34731543624161076
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4541458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4345910904255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/lkoenig_BBAI_200_Gemma/b71c5ede-010d-4ce4-9f12-552388e2d9eb.json b/leaderboard_data/HFOpenLLMv2/google/lkoenig_BBAI_200_Gemma/b71c5ede-010d-4ce4-9f12-552388e2d9eb.json
deleted file mode 100644
index b7d2d0d7db5b2a5a50ff1a5872f940c4bc4fa7f7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/lkoenig_BBAI_200_Gemma/b71c5ede-010d-4ce4-9f12-552388e2d9eb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/lkoenig_BBAI_200_Gemma/1762652580.32272",
- "retrieved_timestamp": "1762652580.32272",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "lkoenig/BBAI_200_Gemma",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "lkoenig/BBAI_200_Gemma"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07051733843978422
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3449044607726533
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26677852348993286
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36311458333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16788563829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 19.3
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/monsterapi_gemma-2-2b-LoRA-MonsterInstruct/f5395aa2-334b-410c-a2ee-4d7381f1c9bc.json b/leaderboard_data/HFOpenLLMv2/google/monsterapi_gemma-2-2b-LoRA-MonsterInstruct/f5395aa2-334b-410c-a2ee-4d7381f1c9bc.json
deleted file mode 100644
index fc07be68fe1d818626ab0b126d9590e23737ccf3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/monsterapi_gemma-2-2b-LoRA-MonsterInstruct/f5395aa2-334b-410c-a2ee-4d7381f1c9bc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/monsterapi_gemma-2-2b-LoRA-MonsterInstruct/1762652580.372597",
- "retrieved_timestamp": "1762652580.372598",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "monsterapi/gemma-2-2b-LoRA-MonsterInstruct",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "monsterapi/gemma-2-2b-LoRA-MonsterInstruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3902545246612322
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36496861927498697
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05060422960725076
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3643854166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19872007978723405
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/nbeerbower_Gemma2-Gutenberg-Doppel-9B/b6514bef-f106-45e0-8571-da3507b0e95b.json b/leaderboard_data/HFOpenLLMv2/google/nbeerbower_Gemma2-Gutenberg-Doppel-9B/b6514bef-f106-45e0-8571-da3507b0e95b.json
deleted file mode 100644
index 86a654c2aa0ddb8ae5cfc508684c4d30cbfe975c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/nbeerbower_Gemma2-Gutenberg-Doppel-9B/b6514bef-f106-45e0-8571-da3507b0e95b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/nbeerbower_Gemma2-Gutenberg-Doppel-9B/1762652580.378716",
- "retrieved_timestamp": "1762652580.378717",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "nbeerbower/Gemma2-Gutenberg-Doppel-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "nbeerbower/Gemma2-Gutenberg-Doppel-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7171094917042337
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5870114193661848
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19788519637462235
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3296979865771812
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46078125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41273271276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/nbeerbower_gemma2-gutenberg-27B/b0a9fb09-2637-4b4c-9d78-7dc8d9c6aad2.json b/leaderboard_data/HFOpenLLMv2/google/nbeerbower_gemma2-gutenberg-27B/b0a9fb09-2637-4b4c-9d78-7dc8d9c6aad2.json
deleted file mode 100644
index 54c32c1194eac9632ce41a6ea4badfa2e54c14ee..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/nbeerbower_gemma2-gutenberg-27B/b0a9fb09-2637-4b4c-9d78-7dc8d9c6aad2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/nbeerbower_gemma2-gutenberg-27B/1762652580.384448",
- "retrieved_timestamp": "1762652580.3844512",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "nbeerbower/gemma2-gutenberg-27B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "nbeerbower/gemma2-gutenberg-27B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29470804133033685
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37965683503451614
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0188821752265861
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2726510067114094
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3727291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19822140957446807
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 27.227
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/nbeerbower_gemma2-gutenberg-9B/14dc56ff-7f3b-430e-a4b3-6e4c9961fea3.json b/leaderboard_data/HFOpenLLMv2/google/nbeerbower_gemma2-gutenberg-9B/14dc56ff-7f3b-430e-a4b3-6e4c9961fea3.json
deleted file mode 100644
index ff31bbd2402f3b3def4d04015b53bd505190b328..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/nbeerbower_gemma2-gutenberg-9B/14dc56ff-7f3b-430e-a4b3-6e4c9961fea3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/nbeerbower_gemma2-gutenberg-9B/1762652580.384712",
- "retrieved_timestamp": "1762652580.384713",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "nbeerbower/gemma2-gutenberg-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "nbeerbower/gemma2-gutenberg-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2795948084416016
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5950904001490335
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08081570996978851
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45951041666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4192154255319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/nhyha_N3N_gemma-2-9b-it_20241029_1532/cb85dee2-acee-48f8-85aa-1d5664179fd5.json b/leaderboard_data/HFOpenLLMv2/google/nhyha_N3N_gemma-2-9b-it_20241029_1532/cb85dee2-acee-48f8-85aa-1d5664179fd5.json
deleted file mode 100644
index de663d3ec4d050190d770145cac5f661b12a4a7f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/nhyha_N3N_gemma-2-9b-it_20241029_1532/cb85dee2-acee-48f8-85aa-1d5664179fd5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/nhyha_N3N_gemma-2-9b-it_20241029_1532/1762652580.4059799",
- "retrieved_timestamp": "1762652580.4059808",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "nhyha/N3N_gemma-2-9b-it_20241029_1532",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "nhyha/N3N_gemma-2-9b-it_20241029_1532"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6751940407008958
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5863124381827675
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2122356495468278
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34060402684563756
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4593541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4122340425531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/nhyha_N3N_gemma-2-9b-it_20241110_2026/4c450b48-8477-45cb-9cfa-814c21dd39d7.json b/leaderboard_data/HFOpenLLMv2/google/nhyha_N3N_gemma-2-9b-it_20241110_2026/4c450b48-8477-45cb-9cfa-814c21dd39d7.json
deleted file mode 100644
index 9ec2d6cd1b1ed0edeef3b58ab16e01a55b97c121..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/nhyha_N3N_gemma-2-9b-it_20241110_2026/4c450b48-8477-45cb-9cfa-814c21dd39d7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/nhyha_N3N_gemma-2-9b-it_20241110_2026/1762652580.406234",
- "retrieved_timestamp": "1762652580.406235",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "nhyha/N3N_gemma-2-9b-it_20241110_2026",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "nhyha/N3N_gemma-2-9b-it_20241110_2026"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6282829558903709
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5867149609980419
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1608761329305136
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33640939597315433
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40730208333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40201130319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/nidum_Nidum-Limitless-Gemma-2B/49e352c1-2319-4bc5-aa3f-1697739a05b8.json b/leaderboard_data/HFOpenLLMv2/google/nidum_Nidum-Limitless-Gemma-2B/49e352c1-2319-4bc5-aa3f-1697739a05b8.json
deleted file mode 100644
index 4268cf870bd3c518f358466ffcf2d73e04a798b9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/nidum_Nidum-Limitless-Gemma-2B/49e352c1-2319-4bc5-aa3f-1697739a05b8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/nidum_Nidum-Limitless-Gemma-2B/1762652580.406632",
- "retrieved_timestamp": "1762652580.406633",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "nidum/Nidum-Limitless-Gemma-2B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "nidum/Nidum-Limitless-Gemma-2B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24235140538216376
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3078801520076317
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.013595166163141994
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26426174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37403125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11735372340425532
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GemmaForCausalLM",
- "params_billions": 2.506
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/noname0202_gemma-2-2b-it-ties/42bed40b-ac71-42c8-b56b-47d1f930c736.json b/leaderboard_data/HFOpenLLMv2/google/noname0202_gemma-2-2b-it-ties/42bed40b-ac71-42c8-b56b-47d1f930c736.json
deleted file mode 100644
index 9cc58ee0ff5edbf46d2c3384e87a112c9982dd1c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/noname0202_gemma-2-2b-it-ties/42bed40b-ac71-42c8-b56b-47d1f930c736.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/noname0202_gemma-2-2b-it-ties/1762652580.4097438",
- "retrieved_timestamp": "1762652580.409745",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "noname0202/gemma-2-2b-it-ties",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "noname0202/gemma-2-2b-it-ties"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12657083205893696
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42057403060290816
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02416918429003021
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2701342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39288541666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2560671542553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/princeton-nlp_gemma-2-9b-it-SimPO/4285b38c-aba8-444b-9b0b-b265c7b1fef1.json b/leaderboard_data/HFOpenLLMv2/google/princeton-nlp_gemma-2-9b-it-SimPO/4285b38c-aba8-444b-9b0b-b265c7b1fef1.json
deleted file mode 100644
index d17aed9d2754b6734e25400ffd2214e2cb2dd7fe..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/princeton-nlp_gemma-2-9b-it-SimPO/4285b38c-aba8-444b-9b0b-b265c7b1fef1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/princeton-nlp_gemma-2-9b-it-SimPO/1762652580.454763",
- "retrieved_timestamp": "1762652580.4547682",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "princeton-nlp/gemma-2-9b-it-SimPO",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "princeton-nlp/gemma-2-9b-it-SimPO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3206857803960159
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5839179923162123
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07099697885196375
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33557046979865773
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41232291666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39752327127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/qq8933_OpenLongCoT-Base-Gemma2-2B/c945b9b5-7b46-4300-adcc-2d6c94df0ac1.json b/leaderboard_data/HFOpenLLMv2/google/qq8933_OpenLongCoT-Base-Gemma2-2B/c945b9b5-7b46-4300-adcc-2d6c94df0ac1.json
deleted file mode 100644
index 1a960d5f0572403e1e4464b3f4bcc60de81b7679..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/qq8933_OpenLongCoT-Base-Gemma2-2B/c945b9b5-7b46-4300-adcc-2d6c94df0ac1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/qq8933_OpenLongCoT-Base-Gemma2-2B/1762652580.488883",
- "retrieved_timestamp": "1762652580.488883",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "qq8933/OpenLongCoT-Base-Gemma2-2B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "qq8933/OpenLongCoT-Base-Gemma2-2B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1965141380426158
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3106362870893106
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.023413897280966767
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32225
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1315658244680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 3.204
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/recoilme_Gemma-2-Ataraxy-Gemmasutra-9B-slerp/054a662a-e425-448c-9556-6998833e51ff.json b/leaderboard_data/HFOpenLLMv2/google/recoilme_Gemma-2-Ataraxy-Gemmasutra-9B-slerp/054a662a-e425-448c-9556-6998833e51ff.json
deleted file mode 100644
index 13538a71529710db6ca718c9ef103921cddb9f53..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/recoilme_Gemma-2-Ataraxy-Gemmasutra-9B-slerp/054a662a-e425-448c-9556-6998833e51ff.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/recoilme_Gemma-2-Ataraxy-Gemmasutra-9B-slerp/1762652580.491333",
- "retrieved_timestamp": "1762652580.491333",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7648949232480928
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.597438766061506
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.017371601208459216
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4244791666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4207114361702128
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/recoilme_Gemma-2-Ataraxy-Gemmasutra-9B-slerp/0a685d8f-38c7-4521-9613-7b36ad1cac73.json b/leaderboard_data/HFOpenLLMv2/google/recoilme_Gemma-2-Ataraxy-Gemmasutra-9B-slerp/0a685d8f-38c7-4521-9613-7b36ad1cac73.json
deleted file mode 100644
index b6c5609aa326d3e4f20fbc4f74166691ee7818be..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/recoilme_Gemma-2-Ataraxy-Gemmasutra-9B-slerp/0a685d8f-38c7-4521-9613-7b36ad1cac73.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/recoilme_Gemma-2-Ataraxy-Gemmasutra-9B-slerp/1762652580.491603",
- "retrieved_timestamp": "1762652580.491603",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "recoilme/Gemma-2-Ataraxy-Gemmasutra-9B-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28536505361330156
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5983926033872208
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10045317220543806
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3296979865771812
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46065625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4162234042553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.1/d31a41b0-6500-4e1b-8435-b9d3e9725c02.json b/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.1/d31a41b0-6500-4e1b-8435-b9d3e9725c02.json
deleted file mode 100644
index 70a862a7728aab34f128f730f1b7903f001528ab..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.1/d31a41b0-6500-4e1b-8435-b9d3e9725c02.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.1/1762652580.491797",
- "retrieved_timestamp": "1762652580.491798",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "recoilme/recoilme-gemma-2-9B-v0.1",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "recoilme/recoilme-gemma-2-9B-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.751506004069203
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5995309756292291
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2039274924471299
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3389261744966443
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41914583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4158909574468085
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.2/5826c93f-3642-44cf-b385-4a5ab5103086.json b/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.2/5826c93f-3642-44cf-b385-4a5ab5103086.json
deleted file mode 100644
index 3e67a6fa1eab3dcee5fb9d571700d2bb112692b3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.2/5826c93f-3642-44cf-b385-4a5ab5103086.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.2/1762652580.4922318",
- "retrieved_timestamp": "1762652580.492233",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "recoilme/recoilme-gemma-2-9B-v0.2",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "recoilme/recoilme-gemma-2-9B-v0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2746989100032359
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6030832642626502
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08308157099697885
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46859375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4122340425531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.2/6a15378c-36cc-4f5e-b184-5a19a6fbb192.json b/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.2/6a15378c-36cc-4f5e-b184-5a19a6fbb192.json
deleted file mode 100644
index dc617e475e34da317650569cb0f474e34b09f6ff..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.2/6a15378c-36cc-4f5e-b184-5a19a6fbb192.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.2/1762652580.492019",
- "retrieved_timestamp": "1762652580.49202",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "recoilme/recoilme-gemma-2-9B-v0.2",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "recoilme/recoilme-gemma-2-9B-v0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7591745457608035
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6025964285724085
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.052870090634441085
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3288590604026846
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.409875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41630651595744683
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.3/47cfe707-ba31-4c9b-aa15-9ab8b566e206.json b/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.3/47cfe707-ba31-4c9b-aa15-9ab8b566e206.json
deleted file mode 100644
index 19c46567c7647d43e8f412b30653981644d18f2c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.3/47cfe707-ba31-4c9b-aa15-9ab8b566e206.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.3/1762652580.492416",
- "retrieved_timestamp": "1762652580.492416",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "recoilme/recoilme-gemma-2-9B-v0.3",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "recoilme/recoilme-gemma-2-9B-v0.3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.743937197746424
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5992527878628748
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08761329305135952
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3238255033557047
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4203854166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4072473404255319
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.3/8d3bd687-89f5-4d62-af46-93646aea4341.json b/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.3/8d3bd687-89f5-4d62-af46-93646aea4341.json
deleted file mode 100644
index 9dee920187aa5a737ff56cdc72c0eccef0815183..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.3/8d3bd687-89f5-4d62-af46-93646aea4341.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.3/1762652580.492666",
- "retrieved_timestamp": "1762652580.492667",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "recoilme/recoilme-gemma-2-9B-v0.3",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "recoilme/recoilme-gemma-2-9B-v0.3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.57607592299543
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6019827101058847
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18882175226586104
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.337248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46322916666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4039228723404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.4/28eef1b7-a83e-49c9-8f11-ef9e4ae7e1ce.json b/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.4/28eef1b7-a83e-49c9-8f11-ef9e4ae7e1ce.json
deleted file mode 100644
index 26aa56d1c7fb267a171921ab3bffbfc550013c7a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.4/28eef1b7-a83e-49c9-8f11-ef9e4ae7e1ce.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.4/1762652580.4928808",
- "retrieved_timestamp": "1762652580.492882",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "recoilme/recoilme-gemma-2-9B-v0.4",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "recoilme/recoilme-gemma-2-9B-v0.4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2561891337207498
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5967285833554881
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08459214501510574
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34060402684563756
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4726875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4405751329787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.5/8fe5a1e8-1491-4e64-8aed-32e73f2dae6e.json b/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.5/8fe5a1e8-1491-4e64-8aed-32e73f2dae6e.json
deleted file mode 100644
index 00374765572366b1d089bfc9194e29b4bf3440e1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/recoilme_recoilme-gemma-2-9B-v0.5/8fe5a1e8-1491-4e64-8aed-32e73f2dae6e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/recoilme_recoilme-gemma-2-9B-v0.5/1762652580.4931269",
- "retrieved_timestamp": "1762652580.493134",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "recoilme/recoilme-gemma-2-9B-v0.5",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "recoilme/recoilme-gemma-2-9B-v0.5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7664186580495308
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5981472549925003
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21148036253776434
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33640939597315433
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4231770833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41996343085106386
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/sequelbox_gemma-2-9B-MOTH/4bdefb85-2413-43b7-8938-869ad0cff58f.json b/leaderboard_data/HFOpenLLMv2/google/sequelbox_gemma-2-9B-MOTH/4bdefb85-2413-43b7-8938-869ad0cff58f.json
deleted file mode 100644
index b7e8a93e4433ac86ab62c946eac76263b683f753..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/sequelbox_gemma-2-9B-MOTH/4bdefb85-2413-43b7-8938-869ad0cff58f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/sequelbox_gemma-2-9B-MOTH/1762652580.5126731",
- "retrieved_timestamp": "1762652580.512674",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "sequelbox/gemma-2-9B-MOTH",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "sequelbox/gemma-2-9B-MOTH"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20588150551647405
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30797000521562534
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.010574018126888218
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3409479166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11402925531914894
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/wzhouad_gemma-2-9b-it-WPO-HB/70fe199f-6c81-4d99-a595-208b7abc321f.json b/leaderboard_data/HFOpenLLMv2/google/wzhouad_gemma-2-9b-it-WPO-HB/70fe199f-6c81-4d99-a595-208b7abc321f.json
deleted file mode 100644
index 74263acd90c9f652662d56c5d0ec9927008ff27a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/wzhouad_gemma-2-9b-it-WPO-HB/70fe199f-6c81-4d99-a595-208b7abc321f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/wzhouad_gemma-2-9b-it-WPO-HB/1762652580.596365",
- "retrieved_timestamp": "1762652580.5963662",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "wzhouad/gemma-2-9b-it-WPO-HB",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "wzhouad/gemma-2-9b-it-WPO-HB"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5437029304467702
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5628624376751974
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15332326283987915
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3498322147651007
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3674583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33602061170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-ORPO-jpn-it-abliterated-18-merge/9c7a213f-e5f8-4cc2-9cbe-d61db2cf2bbe.json b/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-ORPO-jpn-it-abliterated-18-merge/9c7a213f-e5f8-4cc2-9cbe-d61db2cf2bbe.json
deleted file mode 100644
index 2cd22138a8e9202be87e502bec7565ba908aa850..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-ORPO-jpn-it-abliterated-18-merge/9c7a213f-e5f8-4cc2-9cbe-d61db2cf2bbe.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-ORPO-jpn-it-abliterated-18-merge/1762652580.609323",
- "retrieved_timestamp": "1762652580.609324",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18-merge"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5218209905273563
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.414688942270627
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.054380664652567974
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2835570469798658
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35139583333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24609375
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-ORPO-jpn-it-abliterated-18/23800723-b5bd-4fc6-9d07-ca937c8680c6.json b/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-ORPO-jpn-it-abliterated-18/23800723-b5bd-4fc6-9d07-ca937c8680c6.json
deleted file mode 100644
index 20e1bc4ed611432d30a06aac8493ccc6d8000e11..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-ORPO-jpn-it-abliterated-18/23800723-b5bd-4fc6-9d07-ca937c8680c6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-ORPO-jpn-it-abliterated-18/1762652580.6090298",
- "retrieved_timestamp": "1762652580.609031",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "ymcki/gemma-2-2b-ORPO-jpn-it-abliterated-18"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4630945890237902
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4052902505118913
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04305135951661632
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28859060402684567
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3754270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23445811170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17-18-24/7321bd04-6f20-427a-8219-0ff2e299cb01.json b/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17-18-24/7321bd04-6f20-427a-8219-0ff2e299cb01.json
deleted file mode 100644
index 1d85b8c3b91b65562608327add5d7c85a09852f5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17-18-24/7321bd04-6f20-427a-8219-0ff2e299cb01.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-17-18-24/1762652580.609858",
- "retrieved_timestamp": "1762652580.609859",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-18-24"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.505484337114412
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38123590457353557
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0256797583081571
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35015625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2282247340425532
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca/3cc8621a-b38c-4735-af09-027989774289.json b/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca/3cc8621a-b38c-4735-af09-027989774289.json
deleted file mode 100644
index aac637ccc83c5f24ed4026a17f576a4372cca891..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca/3cc8621a-b38c-4735-af09-027989774289.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca/1762652580.6102881",
- "retrieved_timestamp": "1762652580.6102889",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO-alpaca"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30647349033896726
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40715971926711275
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0324773413897281
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26929530201342283
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39691666666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2249002659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17-ORPO/44b47789-f529-4bae-9e87-196abc325efc.json b/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17-ORPO/44b47789-f529-4bae-9e87-196abc325efc.json
deleted file mode 100644
index 195b074417c46e7dd07e5f7a6d565510147ac23c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17-ORPO/44b47789-f529-4bae-9e87-196abc325efc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-17-ORPO/1762652580.610075",
- "retrieved_timestamp": "1762652580.610076",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17-ORPO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47478468242042227
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38979797271028965
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.061933534743202415
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27432885906040266
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37676041666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21908244680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17/5958a61d-bf39-4de4-bfe1-6a6db2f37f55.json b/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17/5958a61d-bf39-4de4-bfe1-6a6db2f37f55.json
deleted file mode 100644
index 7e09f976f96d8c9bec94453d22992b3a3687cbc2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-17/5958a61d-bf39-4de4-bfe1-6a6db2f37f55.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-17/1762652580.609628",
- "retrieved_timestamp": "1762652580.609628",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ymcki/gemma-2-2b-jpn-it-abliterated-17",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "ymcki/gemma-2-2b-jpn-it-abliterated-17"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5081572449988254
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40762664531580056
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03851963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27181208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37006249999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2455119680851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-18-ORPO/c91ab7d1-b36e-45ca-8f1e-ad9ef0c38100.json b/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-18-ORPO/c91ab7d1-b36e-45ca-8f1e-ad9ef0c38100.json
deleted file mode 100644
index 5f7750677b282dd6682132b8ba4bd6ad0e706631..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-18-ORPO/c91ab7d1-b36e-45ca-8f1e-ad9ef0c38100.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-18-ORPO/1762652580.610698",
- "retrieved_timestamp": "1762652580.610699",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "ymcki/gemma-2-2b-jpn-it-abliterated-18-ORPO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47423502972113984
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40389353402379324
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04682779456193353
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3953333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21850066489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-18/78f235b0-fa98-48e2-bb03-9f7e9f986004.json b/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-18/78f235b0-fa98-48e2-bb03-9f7e9f986004.json
deleted file mode 100644
index 2314c9e0cc9929f9de5dd0317a7dab8a7aa69e0f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-18/78f235b0-fa98-48e2-bb03-9f7e9f986004.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-18/1762652580.610494",
- "retrieved_timestamp": "1762652580.610495",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ymcki/gemma-2-2b-jpn-it-abliterated-18",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "ymcki/gemma-2-2b-jpn-it-abliterated-18"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5175246124726836
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4132188791645781
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0445619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27348993288590606
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37415624999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25049867021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-24/4f0262d9-2a01-4127-bb40-1bbf437bbc07.json b/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-24/4f0262d9-2a01-4127-bb40-1bbf437bbc07.json
deleted file mode 100644
index 74e31d307bd6a03504d319c8714777bacd212776..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/ymcki_gemma-2-2b-jpn-it-abliterated-24/4f0262d9-2a01-4127-bb40-1bbf437bbc07.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ymcki_gemma-2-2b-jpn-it-abliterated-24/1762652580.610902",
- "retrieved_timestamp": "1762652580.610903",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ymcki/gemma-2-2b-jpn-it-abliterated-24",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "ymcki/gemma-2-2b-jpn-it-abliterated-24"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49786566310722213
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41096027770392857
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04380664652567976
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39148958333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2473404255319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 2.614
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zake7749_gemma-2-9b-it-chinese-kyara/827af354-0efb-4a44-b62a-c8562fd0065b.json b/leaderboard_data/HFOpenLLMv2/google/zake7749_gemma-2-9b-it-chinese-kyara/827af354-0efb-4a44-b62a-c8562fd0065b.json
deleted file mode 100644
index d820e36bfd9482a4022326b91ff523051a75d920..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zake7749_gemma-2-9b-it-chinese-kyara/827af354-0efb-4a44-b62a-c8562fd0065b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zake7749_gemma-2-9b-it-chinese-kyara/1762652580.612564",
- "retrieved_timestamp": "1762652580.612565",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zake7749/gemma-2-9b-it-chinese-kyara",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zake7749/gemma-2-9b-it-chinese-kyara"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17642965110351644
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5953692987878404
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10498489425981873
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4241979166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41788563829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 9.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_Gemma-2-TM-9B/4d3c877e-3dea-44af-8133-d555355971f8.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_Gemma-2-TM-9B/4d3c877e-3dea-44af-8133-d555355971f8.json
deleted file mode 100644
index 382c5b725cfdb3ec0163eb26882f487e2aeab67c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_Gemma-2-TM-9B/4d3c877e-3dea-44af-8133-d555355971f8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_Gemma-2-TM-9B/1762652580.612811",
- "retrieved_timestamp": "1762652580.612811",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/Gemma-2-TM-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/Gemma-2-TM-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8044621604010691
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5986592993557701
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20241691842900303
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3464765100671141
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41523958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40882646276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen1-gemma-2-9B/119f453d-714d-4324-aac5-8448bab91771.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen1-gemma-2-9B/119f453d-714d-4324-aac5-8448bab91771.json
deleted file mode 100644
index b97807cd85096add27d841ff2d341a25cf0472d9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen1-gemma-2-9B/119f453d-714d-4324-aac5-8448bab91771.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen1-gemma-2-9B/1762652580.613055",
- "retrieved_timestamp": "1762652580.613056",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT-Gen1-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT-Gen1-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7886252920029965
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6099997385328262
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22205438066465258
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3464765100671141
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4216875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4380817819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen2-GI-gemma-2-9B/0cf7e394-67e2-4ca3-ab2e-00cd4165eaf8.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen2-GI-gemma-2-9B/0cf7e394-67e2-4ca3-ab2e-00cd4165eaf8.json
deleted file mode 100644
index e3a3d43605aca663f28682676b8793734dca11fc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen2-GI-gemma-2-9B/0cf7e394-67e2-4ca3-ab2e-00cd4165eaf8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen2-GI-gemma-2-9B/1762652580.613308",
- "retrieved_timestamp": "1762652580.613309",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT-Gen2-GI-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT-Gen2-GI-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7913979352562313
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6095558882654465
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22054380664652568
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35067114093959734
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42832291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43558843085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen2-gemma-2-9B/6f5cbf98-67b4-4651-acee-160fe2e36f59.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen2-gemma-2-9B/6f5cbf98-67b4-4651-acee-160fe2e36f59.json
deleted file mode 100644
index 00e616cc7b8ffb9c6ba0f5d56555c49417f08555..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen2-gemma-2-9B/6f5cbf98-67b4-4651-acee-160fe2e36f59.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen2-gemma-2-9B/1762652580.613527",
- "retrieved_timestamp": "1762652580.613528",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT-Gen2-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT-Gen2-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7907485471881275
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6100494662695
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2190332326283988
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3464765100671141
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4322916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4387466755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen3-gemma-2-9B/79319862-c5eb-40a1-9424-ecc3835c1c9e.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen3-gemma-2-9B/79319862-c5eb-40a1-9424-ecc3835c1c9e.json
deleted file mode 100644
index 7de311dfec37c38ae434c9799d5c13335068b6a7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen3-gemma-2-9B/79319862-c5eb-40a1-9424-ecc3835c1c9e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen3-gemma-2-9B/1762652580.613742",
- "retrieved_timestamp": "1762652580.613743",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT-Gen3-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT-Gen3-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8020142111818863
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6097112889343964
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.229607250755287
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.348993288590604
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4216875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43558843085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen4-gemma-2-9B/7442a4c1-e225-4cea-b107-2d975460e214.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen4-gemma-2-9B/7442a4c1-e225-4cea-b107-2d975460e214.json
deleted file mode 100644
index e8088598e56c027380156297898b62ed9a5134d1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen4-gemma-2-9B/7442a4c1-e225-4cea-b107-2d975460e214.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen4-gemma-2-9B/1762652580.613958",
- "retrieved_timestamp": "1762652580.6139588",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT-Gen4-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT-Gen4-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7883005979689446
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6109884725351095
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22356495468277945
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3548657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4228020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4387466755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen5-gemma-2-9B/4431b126-a8b8-4776-8dd5-448ec4fb0caf.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen5-gemma-2-9B/4431b126-a8b8-4776-8dd5-448ec4fb0caf.json
deleted file mode 100644
index 3f2ca2a0cf105aa28bec766e00c1906896f8f135..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen5-gemma-2-9B/4431b126-a8b8-4776-8dd5-448ec4fb0caf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen5-gemma-2-9B/1762652580.614163",
- "retrieved_timestamp": "1762652580.614163",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT-Gen5-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT-Gen5-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7923221496739761
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6132787046647334
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21525679758308158
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35151006711409394
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42016666666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4402426861702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen6-gemma-2-9B/2dc22f82-e2fb-4690-b8e6-8c77b9bc9c45.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen6-gemma-2-9B/2dc22f82-e2fb-4690-b8e6-8c77b9bc9c45.json
deleted file mode 100644
index a341a3170da30123fb296ff13b8d81ddbce2fb5d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen6-gemma-2-9B/2dc22f82-e2fb-4690-b8e6-8c77b9bc9c45.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen6-gemma-2-9B/1762652580.614364",
- "retrieved_timestamp": "1762652580.6143649",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT-Gen6-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT-Gen6-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1615668648075994
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5844669261858688
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0823262839879154
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33305369127516776
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40692708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4165558510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen6fix-gemma-2-9B/0c2ec793-573d-4fb5-abc3-4aef4a8e2e72.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen6fix-gemma-2-9B/0c2ec793-573d-4fb5-abc3-4aef4a8e2e72.json
deleted file mode 100644
index e31b363e11206a27e3471fa4e3c0f395edf403ae..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen6fix-gemma-2-9B/0c2ec793-573d-4fb5-abc3-4aef4a8e2e72.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen6fix-gemma-2-9B/1762652580.614617",
- "retrieved_timestamp": "1762652580.614618",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT-Gen6fix-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT-Gen6fix-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15759518078697854
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5917309697578781
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08157099697885196
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.337248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40841666666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4119847074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen7-gemma-2-9B/29e65163-3e59-4bfe-a950-60092cb3171f.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen7-gemma-2-9B/29e65163-3e59-4bfe-a950-60092cb3171f.json
deleted file mode 100644
index 6022ec6903cedd7faf81879be8ff8f47a4d4ee63..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Gen7-gemma-2-9B/29e65163-3e59-4bfe-a950-60092cb3171f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT-Gen7-gemma-2-9B/1762652580.614857",
- "retrieved_timestamp": "1762652580.614858",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT-Gen7-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT-Gen7-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16641289556155447
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5935242633580781
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0891238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33557046979865773
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40978125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4122340425531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Max-Merge_02012025163610-gemma-2-9B/bfeb5972-e865-4892-b01b-0c92fdab79e9.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Max-Merge_02012025163610-gemma-2-9B/bfeb5972-e865-4892-b01b-0c92fdab79e9.json
deleted file mode 100644
index 0e2e8786d357247e09557b746269cb368f18628d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Max-Merge_02012025163610-gemma-2-9B/bfeb5972-e865-4892-b01b-0c92fdab79e9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT-Max-Merge_02012025163610-gemma-2-9B/1762652580.6150799",
- "retrieved_timestamp": "1762652580.615081",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT-Max-Merge_02012025163610-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT-Max-Merge_02012025163610-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7907485471881275
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6142243374633075
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2212990936555891
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35151006711409394
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4228020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4395777925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge-gemma-2-9B/8025c7ed-3553-489f-8858-091d1ff81a15.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge-gemma-2-9B/8025c7ed-3553-489f-8858-091d1ff81a15.json
deleted file mode 100644
index fba2dc13cd0690565cb2e225fefbef73183b138c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge-gemma-2-9B/8025c7ed-3553-489f-8858-091d1ff81a15.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge-gemma-2-9B/1762652580.615297",
- "retrieved_timestamp": "1762652580.615297",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT-Merge-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT-Merge-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8035379459833243
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6118379158679297
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22054380664652568
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34815436241610737
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.425625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43617021276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge1-gemma-2-9B/0e6d9dcd-e9b7-4638-ac0a-d0600fbb27d8.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge1-gemma-2-9B/0e6d9dcd-e9b7-4638-ac0a-d0600fbb27d8.json
deleted file mode 100644
index 20019f265c68a1996ab721f261eb4572ea1cac0c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge1-gemma-2-9B/0e6d9dcd-e9b7-4638-ac0a-d0600fbb27d8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge1-gemma-2-9B/1762652580.615506",
- "retrieved_timestamp": "1762652580.615506",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT-Merge1-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT-Merge1-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7901490268044344
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6099997385328262
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22885196374622357
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35151006711409394
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4243854166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43741688829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge2-MU-gemma-2-MTg2MT1g2-9B/b149c82e-0099-46f6-a302-0eac4127f418.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge2-MU-gemma-2-MTg2MT1g2-9B/b149c82e-0099-46f6-a302-0eac4127f418.json
deleted file mode 100644
index 9c3466b8e012c7f4979eaf59207c9312d66e0acd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge2-MU-gemma-2-MTg2MT1g2-9B/b149c82e-0099-46f6-a302-0eac4127f418.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge2-MU-gemma-2-MTg2MT1g2-9B/1762652580.615718",
- "retrieved_timestamp": "1762652580.615718",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT-Merge2-MU-gemma-2-MTg2MT1g2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7955945779420825
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.60838922159878
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21827794561933533
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35067114093959734
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43222916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.437250664893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge2-gemma-2-9B/75c81dae-2bb9-4d60-94e2-61141c31ccbd.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge2-gemma-2-9B/75c81dae-2bb9-4d60-94e2-61141c31ccbd.json
deleted file mode 100644
index 483bd8027994e052734c659506bccd80a72989a4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge2-gemma-2-9B/75c81dae-2bb9-4d60-94e2-61141c31ccbd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge2-gemma-2-9B/1762652580.615932",
- "retrieved_timestamp": "1762652580.615933",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT-Merge2-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT-Merge2-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7877010775852515
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6106681877306871
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2348942598187311
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35067114093959734
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4216875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43816489361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge3-gemma-2-9B/c2bad77e-c0d0-4a43-8853-9363cc618603.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge3-gemma-2-9B/c2bad77e-c0d0-4a43-8853-9363cc618603.json
deleted file mode 100644
index bbb9c2c47598d2270f89283bb24dba550e3065c1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge3-gemma-2-9B/c2bad77e-c0d0-4a43-8853-9363cc618603.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge3-gemma-2-9B/1762652580.6161401",
- "retrieved_timestamp": "1762652580.616141",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT-Merge3-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT-Merge3-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7858526487497617
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6102112889343964
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22054380664652568
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.348993288590604
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42575
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4373337765957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge4-gemma-2-9B/7b515db9-e76c-495f-b4f8-a65b913f40e9.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge4-gemma-2-9B/7b515db9-e76c-495f-b4f8-a65b913f40e9.json
deleted file mode 100644
index 9b8f5be8ff8be9df3dd28c2d0361e74fbe678951..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge4-gemma-2-9B/7b515db9-e76c-495f-b4f8-a65b913f40e9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge4-gemma-2-9B/1762652580.616342",
- "retrieved_timestamp": "1762652580.616342",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT-Merge4-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT-Merge4-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7807317916461656
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6118218058684427
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21676737160120846
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3523489932885906
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42943749999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43899601063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge5-gemma-2-9B/f9e1d208-d1ab-4518-9b1b-1470af8bef12.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge5-gemma-2-9B/f9e1d208-d1ab-4518-9b1b-1470af8bef12.json
deleted file mode 100644
index a460e1d644d9f2e9c9b9a27a414a2ad4735754e7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge5-gemma-2-9B/f9e1d208-d1ab-4518-9b1b-1470af8bef12.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge5-gemma-2-9B/1762652580.616543",
- "retrieved_timestamp": "1762652580.616544",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT-Merge5-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT-Merge5-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7843787816327346
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6122674386670167
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21827794561933533
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35318791946308725
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42813541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4387466755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge6-gemma-2-9B/3c796c74-d79c-4c9f-a5ab-dee6c237bde1.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge6-gemma-2-9B/3c796c74-d79c-4c9f-a5ab-dee6c237bde1.json
deleted file mode 100644
index d86f6b21db1b0428bd52dcd1561a885ad1dc92f5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-Merge6-gemma-2-9B/3c796c74-d79c-4c9f-a5ab-dee6c237bde1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT-Merge6-gemma-2-9B/1762652580.6167512",
- "retrieved_timestamp": "1762652580.6167512",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT-Merge6-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT-Merge6-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16946036516443036
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5949106849534558
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08006042296072508
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3288590604026846
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40978125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41148603723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-gemma-2-9B/061fc038-b3fd-4d5b-8ab7-7f3713ad9e55.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-gemma-2-9B/061fc038-b3fd-4d5b-8ab7-7f3713ad9e55.json
deleted file mode 100644
index 47a3fa2f1179da5eddb3217e43e7ffb3489a698d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT-gemma-2-9B/061fc038-b3fd-4d5b-8ab7-7f3713ad9e55.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT-gemma-2-9B/1762652580.616956",
- "retrieved_timestamp": "1762652580.616957",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7968434863938794
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6063604478633632
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2054380664652568
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34563758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40711458333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42237367021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen1-gemma-2-9B/b869eab0-f736-48ef-8870-b98636cc4da1.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen1-gemma-2-9B/b869eab0-f736-48ef-8870-b98636cc4da1.json
deleted file mode 100644
index 6755168ff6068bf86485747d348686b7dd9846f2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen1-gemma-2-9B/b869eab0-f736-48ef-8870-b98636cc4da1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen1-gemma-2-9B/1762652580.617173",
- "retrieved_timestamp": "1762652580.617174",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT1-Gen1-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT1-Gen1-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7974430067775724
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6117787046647335
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2243202416918429
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34395973154362414
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43095833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43758311170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen2-gemma-2-9B/2871c1f6-4010-48e4-8020-1c5024474934.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen2-gemma-2-9B/2871c1f6-4010-48e4-8020-1c5024474934.json
deleted file mode 100644
index 73d06534a6ad690d2ee3288a0c279e033a905252..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen2-gemma-2-9B/2871c1f6-4010-48e4-8020-1c5024474934.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen2-gemma-2-9B/1762652580.617375",
- "retrieved_timestamp": "1762652580.617376",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT1-Gen2-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT1-Gen2-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7983672211953173
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6095989894691557
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22507552870090636
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3523489932885906
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42835416666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43550531914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen3-gemma-2-9B/69b008dd-f8ad-49ce-9bca-fff2e2ce6b72.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen3-gemma-2-9B/69b008dd-f8ad-49ce-9bca-fff2e2ce6b72.json
deleted file mode 100644
index 293264853e285c9659b0716a50ec5fcca9388d26..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen3-gemma-2-9B/69b008dd-f8ad-49ce-9bca-fff2e2ce6b72.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen3-gemma-2-9B/1762652580.617578",
- "retrieved_timestamp": "1762652580.617579",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT1-Gen3-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT1-Gen3-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.795969139660545
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6101551392017761
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2243202416918429
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.348993288590604
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42432291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43492353723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen4-gemma-2-9B/e10f8a93-7131-446d-b792-d179f522a262.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen4-gemma-2-9B/e10f8a93-7131-446d-b792-d179f522a262.json
deleted file mode 100644
index b2730bf8e6b31d5f3641c7faf44d9277e9ca38ec..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen4-gemma-2-9B/e10f8a93-7131-446d-b792-d179f522a262.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen4-gemma-2-9B/1762652580.617781",
- "retrieved_timestamp": "1762652580.617782",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT1-Gen4-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT1-Gen4-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7941207108250552
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6057567677609054
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21601208459214502
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34731543624161076
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42311458333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42860704787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen5-IF-gemma-2-S2DMv1-9B/182a7558-c9f7-43a6-a928-d5d97e082a91.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen5-IF-gemma-2-S2DMv1-9B/182a7558-c9f7-43a6-a928-d5d97e082a91.json
deleted file mode 100644
index cf823339b764f4e2c911be85bdc4ad637853c364..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen5-IF-gemma-2-S2DMv1-9B/182a7558-c9f7-43a6-a928-d5d97e082a91.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen5-IF-gemma-2-S2DMv1-9B/1762652580.617982",
- "retrieved_timestamp": "1762652580.6179829",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT1-Gen5-IF-gemma-2-S2DMv1-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7929216700576691
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6000001533684681
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20317220543806647
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34395973154362414
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4244791666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42179188829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen5-gemma-2-9B/46f2caf1-29e8-4173-b2b2-e54e905e71d9.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen5-gemma-2-9B/46f2caf1-29e8-4173-b2b2-e54e905e71d9.json
deleted file mode 100644
index 54c5be383e476174fa0284d163fc489bc09a74b3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen5-gemma-2-9B/46f2caf1-29e8-4173-b2b2-e54e905e71d9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen5-gemma-2-9B/1762652580.618199",
- "retrieved_timestamp": "1762652580.6182",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT1-Gen5-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT1-Gen5-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7794828831943688
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6017455017631886
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20770392749244712
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3464765100671141
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41914583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42220744680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen6-gemma-2-9B/fcf4087e-9d89-4e8a-a817-6c9092445208.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen6-gemma-2-9B/fcf4087e-9d89-4e8a-a817-6c9092445208.json
deleted file mode 100644
index a585f0f127744a9cae45fb22d506fd0f75ee5aff..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen6-gemma-2-9B/fcf4087e-9d89-4e8a-a817-6c9092445208.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen6-gemma-2-9B/1762652580.618452",
- "retrieved_timestamp": "1762652580.618453",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT1-Gen6-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT1-Gen6-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16336542595867853
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5943545352208355
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08081570996978851
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32802013422818793
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40444791666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4133144946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen7-gemma-2-9B/5b8bdeea-19cf-41c0-890a-55ae1b740e75.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen7-gemma-2-9B/5b8bdeea-19cf-41c0-890a-55ae1b740e75.json
deleted file mode 100644
index 7c6afdc0ddaf4a0716200ac959cd0735fe2cde97..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Gen7-gemma-2-9B/5b8bdeea-19cf-41c0-890a-55ae1b740e75.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT1-Gen7-gemma-2-9B/1762652580.6186602",
- "retrieved_timestamp": "1762652580.6186612",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT1-Gen7-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT1-Gen7-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16336542595867853
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5937953240176393
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08308157099697885
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32802013422818793
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41111458333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4144780585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Max-Merge_02012025163610-gemma-2-9B/01fcc284-cedc-48b7-bc21-b8ec6dd53d3c.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Max-Merge_02012025163610-gemma-2-9B/01fcc284-cedc-48b7-bc21-b8ec6dd53d3c.json
deleted file mode 100644
index 50563e4b84abc64780fd7024c3897cc92ec3d8e5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-Max-Merge_02012025163610-gemma-2-9B/01fcc284-cedc-48b7-bc21-b8ec6dd53d3c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT1-Max-Merge_02012025163610-gemma-2-9B/1762652580.618859",
- "retrieved_timestamp": "1762652580.61886",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT1-Max-Merge_02012025163610-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7928718023732585
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6122674386670167
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22280966767371602
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3548657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4255
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43816489361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-gemma-2-9B/17cda965-9f4b-411c-977f-1fe3238f527f.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-gemma-2-9B/17cda965-9f4b-411c-977f-1fe3238f527f.json
deleted file mode 100644
index 6de14f5428d9ad2f8683cb0155fb4007bc66dfef..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT1-gemma-2-9B/17cda965-9f4b-411c-977f-1fe3238f527f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT1-gemma-2-9B/1762652580.619083",
- "retrieved_timestamp": "1762652580.6190841",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT1-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT1-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7946703635243377
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6108745950756924
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22356495468277945
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34563758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43222916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4357546542553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen1-gemma-2-9B/e6c0f96c-6189-4ed1-bf68-e762249170e7.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen1-gemma-2-9B/e6c0f96c-6189-4ed1-bf68-e762249170e7.json
deleted file mode 100644
index c7da51e4681cc790edf66e702a54071578fb7461..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen1-gemma-2-9B/e6c0f96c-6189-4ed1-bf68-e762249170e7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen1-gemma-2-9B/1762652580.619495",
- "retrieved_timestamp": "1762652580.619499",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT2-Gen1-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT2-Gen1-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7855778224001206
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6100802027920743
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2212990936555891
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34312080536912754
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42432291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4376662234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen2-gemma-2-9B/556a83e2-9b7c-432e-99d5-804da880dfc6.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen2-gemma-2-9B/556a83e2-9b7c-432e-99d5-804da880dfc6.json
deleted file mode 100644
index 11b5d77685bcff6186163f76731cf0cc5bd52090..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen2-gemma-2-9B/556a83e2-9b7c-432e-99d5-804da880dfc6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen2-gemma-2-9B/1762652580.6198761",
- "retrieved_timestamp": "1762652580.619877",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT2-Gen2-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT2-Gen2-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7889001183526376
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6092917531936446
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21827794561933533
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3464765100671141
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42702083333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43882978723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen3-gemma-2-9B/1aa85069-5409-4c32-91d5-1f417be4e465.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen3-gemma-2-9B/1aa85069-5409-4c32-91d5-1f417be4e465.json
deleted file mode 100644
index 825d5351f9152669313b00a45b7afaf616baa667..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen3-gemma-2-9B/1aa85069-5409-4c32-91d5-1f417be4e465.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen3-gemma-2-9B/1762652580.620111",
- "retrieved_timestamp": "1762652580.620112",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT2-Gen3-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT2-Gen3-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7810066179958066
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6104772065373926
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2107250755287009
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3464765100671141
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4230833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43741688829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen4-gemma-2-9B/eb55e4d5-dde4-4349-b8aa-9297604cedf0.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen4-gemma-2-9B/eb55e4d5-dde4-4349-b8aa-9297604cedf0.json
deleted file mode 100644
index eb33825216f108c759949ac23058b90e703e5b7e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen4-gemma-2-9B/eb55e4d5-dde4-4349-b8aa-9297604cedf0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen4-gemma-2-9B/1762652580.620331",
- "retrieved_timestamp": "1762652580.620331",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT2-Gen4-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT2-Gen4-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7895993741051521
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.609655139201776
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22356495468277945
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34563758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41254166666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43209773936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen5-gemma-2-9B/3f7eb2b4-8dfb-4bf5-a462-0c11ccbae935.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen5-gemma-2-9B/3f7eb2b4-8dfb-4bf5-a462-0c11ccbae935.json
deleted file mode 100644
index fbd77c18bc6398d5854d8c712bd20ae1aac20203..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen5-gemma-2-9B/3f7eb2b4-8dfb-4bf5-a462-0c11ccbae935.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen5-gemma-2-9B/1762652580.6205592",
- "retrieved_timestamp": "1762652580.6205592",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT2-Gen5-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT2-Gen5-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7749116787900548
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6063933817527739
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2107250755287009
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35151006711409394
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42441666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43018617021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen6-gemma-2-9B/35e1f76a-96d6-42af-a51b-b1b453536723.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen6-gemma-2-9B/35e1f76a-96d6-42af-a51b-b1b453536723.json
deleted file mode 100644
index a54c87074ef2cb5cc72e7b20a9f30f5e885ea28a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen6-gemma-2-9B/35e1f76a-96d6-42af-a51b-b1b453536723.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen6-gemma-2-9B/1762652580.620769",
- "retrieved_timestamp": "1762652580.620769",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT2-Gen6-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT2-Gen6-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16641289556155447
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.595964957637105
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08459214501510574
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41371874999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42096077127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen7-gemma-2-9B/4b9e66cf-0ddb-4878-8800-2bc05dec750a.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen7-gemma-2-9B/4b9e66cf-0ddb-4878-8800-2bc05dec750a.json
deleted file mode 100644
index 9a38a2b480c041244aaf95348c60a18683bb77bd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Gen7-gemma-2-9B/4b9e66cf-0ddb-4878-8800-2bc05dec750a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT2-Gen7-gemma-2-9B/1762652580.621203",
- "retrieved_timestamp": "1762652580.621205",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT2-Gen7-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT2-Gen7-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17615482475387528
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6078922830693557
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10196374622356495
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3548657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42032291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4311003989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Max-Merge_02012025163610-gemma-2-9B/2144960d-f674-45bd-9509-3cf711dc697b.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Max-Merge_02012025163610-gemma-2-9B/2144960d-f674-45bd-9509-3cf711dc697b.json
deleted file mode 100644
index 142b173c58bb016efd5739c9b9b8e83a01ebf8a3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-Max-Merge_02012025163610-gemma-2-9B/2144960d-f674-45bd-9509-3cf711dc697b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT2-Max-Merge_02012025163610-gemma-2-9B/1762652580.6214652",
- "retrieved_timestamp": "1762652580.6214678",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT2-Max-Merge_02012025163610-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7901490268044344
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6108461203950706
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2243202416918429
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35151006711409394
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42283333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4390791223404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-gemma-2-9B/0644b140-506f-4c7a-ba59-50ab48fad799.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-gemma-2-9B/0644b140-506f-4c7a-ba59-50ab48fad799.json
deleted file mode 100644
index 4541304e5948ecc7c038683132ceaf5bd3923f1b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT2-gemma-2-9B/0644b140-506f-4c7a-ba59-50ab48fad799.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT2-gemma-2-9B/1762652580.6217349",
- "retrieved_timestamp": "1762652580.621736",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT2-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT2-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7885754243185858
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.611511004530543
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2212990936555891
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34731543624161076
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42165625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43683510638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen1-gemma-2-9B/1964f25a-d5b2-467a-a30d-9338082bdcfb.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen1-gemma-2-9B/1964f25a-d5b2-467a-a30d-9338082bdcfb.json
deleted file mode 100644
index 13c215397f3584c93908bfd6a228f0e9f6567363..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen1-gemma-2-9B/1964f25a-d5b2-467a-a30d-9338082bdcfb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen1-gemma-2-9B/1762652580.6219652",
- "retrieved_timestamp": "1762652580.6219661",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT3-Gen1-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT3-Gen1-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7837792612490415
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6106760932030332
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21450151057401812
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3464765100671141
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41511458333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43267952127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen2-gemma-2-9B/55315256-9b4d-4dbd-bc53-7ec384e0fdca.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen2-gemma-2-9B/55315256-9b4d-4dbd-bc53-7ec384e0fdca.json
deleted file mode 100644
index f1c6bae89fa84da136ce7d423a2b1c2138d0a949..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen2-gemma-2-9B/55315256-9b4d-4dbd-bc53-7ec384e0fdca.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen2-gemma-2-9B/1762652580.622196",
- "retrieved_timestamp": "1762652580.622197",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT3-Gen2-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT3-Gen2-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7843289139483238
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6091473194676166
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22356495468277945
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3573825503355705
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41111458333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43326130319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen3-gemma-2-9B/71710546-99cb-4180-9454-1e77696fccf3.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen3-gemma-2-9B/71710546-99cb-4180-9454-1e77696fccf3.json
deleted file mode 100644
index 12159a64f6971ab14dfb1f0000c16a6b103abfbc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen3-gemma-2-9B/71710546-99cb-4180-9454-1e77696fccf3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen3-gemma-2-9B/1762652580.622438",
- "retrieved_timestamp": "1762652580.622439",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT3-Gen3-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT3-Gen3-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7856276900845313
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6088892215987798
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21525679758308158
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35151006711409394
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42575
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4302692819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen4-gemma-2-9B/96b38b17-8c70-4ecf-beb5-8e6ed84942ac.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen4-gemma-2-9B/96b38b17-8c70-4ecf-beb5-8e6ed84942ac.json
deleted file mode 100644
index c229f051a38c4c65dad9ab5456c675d24b4b8e65..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen4-gemma-2-9B/96b38b17-8c70-4ecf-beb5-8e6ed84942ac.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen4-gemma-2-9B/1762652580.6226869",
- "retrieved_timestamp": "1762652580.622689",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT3-Gen4-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT3-Gen4-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7737126380226687
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6100843629460684
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20619335347432025
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34731543624161076
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4476354166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4387466755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen5-gemma-2-9B/53dc50c8-fa89-4d31-92d6-f8b02543e272.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen5-gemma-2-9B/53dc50c8-fa89-4d31-92d6-f8b02543e272.json
deleted file mode 100644
index 479c08a33eb50b64449d67c491106b8088b32dc5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen5-gemma-2-9B/53dc50c8-fa89-4d31-92d6-f8b02543e272.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen5-gemma-2-9B/1762652580.622956",
- "retrieved_timestamp": "1762652580.622956",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT3-Gen5-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT3-Gen5-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7990166092634211
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6098615465467813
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22658610271903323
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35318791946308725
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41911458333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43168218085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen5-gemma-2-9B_v1/95fe9cce-c93d-47e3-a053-defe922abefa.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen5-gemma-2-9B_v1/95fe9cce-c93d-47e3-a053-defe922abefa.json
deleted file mode 100644
index 27a4fac29f707facfff85cfa3255ba265cbbcdda..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen5-gemma-2-9B_v1/95fe9cce-c93d-47e3-a053-defe922abefa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen5-gemma-2-9B_v1/1762652580.623179",
- "retrieved_timestamp": "1762652580.623179",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT3-Gen5-gemma-2-9B_v1",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT3-Gen5-gemma-2-9B_v1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7996161296471141
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6113330718661595
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22280966767371602
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.348993288590604
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4203854166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4359208776595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen6-gemma-2-9B/9f093c1a-eabc-4ee3-9e43-9ac0bc3afa08.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen6-gemma-2-9B/9f093c1a-eabc-4ee3-9e43-9ac0bc3afa08.json
deleted file mode 100644
index 8904c0b51c21c158b184837c9225e0cddb37cf65..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Gen6-gemma-2-9B/9f093c1a-eabc-4ee3-9e43-9ac0bc3afa08.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT3-Gen6-gemma-2-9B/1762652580.623395",
- "retrieved_timestamp": "1762652580.623395",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT3-Gen6-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT3-Gen6-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17615482475387528
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6020072592121909
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08836858006042296
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34312080536912754
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4125729166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41023936170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Max-Merge_02012025163610-gemma-2-9B/42e21a24-7c3c-4e65-ad6e-0b18f6c048eb.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Max-Merge_02012025163610-gemma-2-9B/42e21a24-7c3c-4e65-ad6e-0b18f6c048eb.json
deleted file mode 100644
index c97bd6912f411ca7a19fe50f4172341baf8f32da..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-Max-Merge_02012025163610-gemma-2-9B/42e21a24-7c3c-4e65-ad6e-0b18f6c048eb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT3-Max-Merge_02012025163610-gemma-2-9B/1762652580.623601",
- "retrieved_timestamp": "1762652580.623602",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT3-Max-Merge_02012025163610-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17615482475387528
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6123461203950705
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10120845921450151
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35067114093959734
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42546875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4389128989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-gemma-2-9B/0b8f178b-9980-4250-bc82-66facb367eb8.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-gemma-2-9B/0b8f178b-9980-4250-bc82-66facb367eb8.json
deleted file mode 100644
index 3e69d166cbb64c38302d56c07bf6beb68e0e2fdf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT3-gemma-2-9B/0b8f178b-9980-4250-bc82-66facb367eb8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT3-gemma-2-9B/1762652580.623819",
- "retrieved_timestamp": "1762652580.62382",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT3-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT3-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7786085364610345
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.61307842026088
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21676737160120846
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3447986577181208
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4242916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43267952127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen1-gemma-2-9B/6e5b6be6-cc1d-4a03-8e5e-eeede4ee4298.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen1-gemma-2-9B/6e5b6be6-cc1d-4a03-8e5e-eeede4ee4298.json
deleted file mode 100644
index 8b337d546e3b32a817cef7054b3d82d4e7f75a1f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen1-gemma-2-9B/6e5b6be6-cc1d-4a03-8e5e-eeede4ee4298.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen1-gemma-2-9B/1762652580.624031",
- "retrieved_timestamp": "1762652580.624032",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT4-Gen1-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT4-Gen1-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7894996387363307
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6093827996028333
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21978851963746224
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34395973154362414
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43222916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4389128989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen2-gemma-2-9B/e7f0b28a-32c6-4faf-9cb4-c2ee4a075135.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen2-gemma-2-9B/e7f0b28a-32c6-4faf-9cb4-c2ee4a075135.json
deleted file mode 100644
index 204c5eadeabfa733f6b83f863a824c582b913fa1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen2-gemma-2-9B/e7f0b28a-32c6-4faf-9cb4-c2ee4a075135.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen2-gemma-2-9B/1762652580.6242292",
- "retrieved_timestamp": "1762652580.62423",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT4-Gen2-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT4-Gen2-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8050616807847621
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6108348543973539
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2326283987915408
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34563758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42565625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4367519946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen3-gemma-2-9B/b84ca7e1-4746-449a-841f-fcfd71774104.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen3-gemma-2-9B/b84ca7e1-4746-449a-841f-fcfd71774104.json
deleted file mode 100644
index 9f3bd3e1301fa77bb34e603f39d399b0f6c6d485..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen3-gemma-2-9B/b84ca7e1-4746-449a-841f-fcfd71774104.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen3-gemma-2-9B/1762652580.624489",
- "retrieved_timestamp": "1762652580.62449",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT4-Gen3-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT4-Gen3-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7840540875986826
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6087112889343964
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2190332326283988
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34395973154362414
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42432291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4380817819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen4-gemma-2-9B/b38dc953-12fb-41aa-a887-d9a30ff1799a.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen4-gemma-2-9B/b38dc953-12fb-41aa-a887-d9a30ff1799a.json
deleted file mode 100644
index 638b932fcb87ff65588e4077aebda1fdbb67b489..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen4-gemma-2-9B/b38dc953-12fb-41aa-a887-d9a30ff1799a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen4-gemma-2-9B/1762652580.6246998",
- "retrieved_timestamp": "1762652580.624701",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT4-Gen4-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT4-Gen4-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7874262512356104
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6076031496231499
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21450151057401812
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3523489932885906
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42435416666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4323470744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen5-gemma-2-9B/4a35f213-f9b7-40c5-b164-722f6b4ee933.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen5-gemma-2-9B/4a35f213-f9b7-40c5-b164-722f6b4ee933.json
deleted file mode 100644
index 01a571449b23663ce9d9724bd9f2bda0fb57d458..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Gen5-gemma-2-9B/4a35f213-f9b7-40c5-b164-722f6b4ee933.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT4-Gen5-gemma-2-9B/1762652580.6249092",
- "retrieved_timestamp": "1762652580.62491",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT4-Gen5-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT4-Gen5-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7788833628106757
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6106664051994928
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22658610271903323
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3565436241610738
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42683333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43841422872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Max-Merge_02012025163610-gemma-2-9B/ae4224f6-36e8-48e2-a0bf-a79299c365ad.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Max-Merge_02012025163610-gemma-2-9B/ae4224f6-36e8-48e2-a0bf-a79299c365ad.json
deleted file mode 100644
index 17a26f90b30969cf5d0e2dfb99be53b19f6dd0d9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-Max-Merge_02012025163610-gemma-2-9B/ae4224f6-36e8-48e2-a0bf-a79299c365ad.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT4-Max-Merge_02012025163610-gemma-2-9B/1762652580.625107",
- "retrieved_timestamp": "1762652580.625107",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT4-Max-Merge_02012025163610-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1770790391716202
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6120127870617372
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09516616314199396
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35151006711409394
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4228020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4390791223404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-gemma-2-9B/a312ee46-fd2f-4a0d-a778-7e235910a147.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-gemma-2-9B/a312ee46-fd2f-4a0d-a778-7e235910a147.json
deleted file mode 100644
index 3bed200d498786bac68919561587ff0472bfb6b0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT4-gemma-2-9B/a312ee46-fd2f-4a0d-a778-7e235910a147.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT4-gemma-2-9B/1762652580.62533",
- "retrieved_timestamp": "1762652580.625331",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT4-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT4-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7761605872418517
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.607313601341302
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2084592145015106
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43092708333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43658577127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen1-gemma-2-9B/b311d3f4-6eda-4053-91d2-416c4d796c6d.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen1-gemma-2-9B/b311d3f4-6eda-4053-91d2-416c4d796c6d.json
deleted file mode 100644
index e8cc60221a8730987113831e512d3af82dcc3aca..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen1-gemma-2-9B/b311d3f4-6eda-4053-91d2-416c4d796c6d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen1-gemma-2-9B/1762652580.625538",
- "retrieved_timestamp": "1762652580.625539",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT5-Gen1-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT5-Gen1-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7831298731809377
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6110476837383056
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2212990936555891
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34731543624161076
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4203854166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43683510638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen2-gemma-2-9B/d59d00da-e88f-4d1a-9c47-538020ae0114.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen2-gemma-2-9B/d59d00da-e88f-4d1a-9c47-538020ae0114.json
deleted file mode 100644
index d5d80e8e669c265c0fb7d2a4e909f89fb1b09411..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen2-gemma-2-9B/d59d00da-e88f-4d1a-9c47-538020ae0114.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen2-gemma-2-9B/1762652580.625738",
- "retrieved_timestamp": "1762652580.625739",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT5-Gen2-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT5-Gen2-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7962439660101863
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.610541261742359
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22054380664652568
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35151006711409394
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41629166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4379155585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen3-gemma-2-9B/1ff959c7-3477-40e5-8460-971337adc788.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen3-gemma-2-9B/1ff959c7-3477-40e5-8460-971337adc788.json
deleted file mode 100644
index 585f25eb300ef1bfb106ebd8fda0c6b1a57183de..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen3-gemma-2-9B/1ff959c7-3477-40e5-8460-971337adc788.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen3-gemma-2-9B/1762652580.625941",
- "retrieved_timestamp": "1762652580.625942",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT5-Gen3-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT5-Gen3-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7825303527972447
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6090494662695
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21676737160120846
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35151006711409394
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42305208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4375
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen4-gemma-2-9B/6cbd7c31-df0a-4920-9c23-be53f107698e.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen4-gemma-2-9B/6cbd7c31-df0a-4920-9c23-be53f107698e.json
deleted file mode 100644
index b6a3c30ec2b6787f2a02d9131a9e061abaf7598b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen4-gemma-2-9B/6cbd7c31-df0a-4920-9c23-be53f107698e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen4-gemma-2-9B/1762652580.62615",
- "retrieved_timestamp": "1762652580.6261508",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT5-Gen4-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT5-Gen4-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7834545672149895
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6131056160021203
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2243202416918429
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35318791946308725
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42283333333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4396609042553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen5-gemma-2-9B/b4ca4df6-2631-4ba3-bb55-8eadec5dd348.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen5-gemma-2-9B/b4ca4df6-2631-4ba3-bb55-8eadec5dd348.json
deleted file mode 100644
index cfbcae7705facc51b97b209c48d374edb5357991..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Gen5-gemma-2-9B/b4ca4df6-2631-4ba3-bb55-8eadec5dd348.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT5-Gen5-gemma-2-9B/1762652580.6263602",
- "retrieved_timestamp": "1762652580.6263611",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT5-Gen5-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT5-Gen5-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7947202312087482
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6111664051994928
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2258308157099698
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34815436241610737
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41911458333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43292885638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Max-Merge_02012025163610-gemma-2-9B/6737b327-bd1c-4eee-a461-af685edcd7b5.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Max-Merge_02012025163610-gemma-2-9B/6737b327-bd1c-4eee-a461-af685edcd7b5.json
deleted file mode 100644
index 6abf38344157ae3cdbbc723b23ac92c183bdd71e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-Max-Merge_02012025163610-gemma-2-9B/6737b327-bd1c-4eee-a461-af685edcd7b5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT5-Max-Merge_02012025163610-gemma-2-9B/1762652580.62657",
- "retrieved_timestamp": "1762652580.62657",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT5-Max-Merge_02012025163610-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17615482475387528
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6126794537284038
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09818731117824774
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35151006711409394
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4227708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43899601063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-gemma-2-9B/dd306da8-60aa-4022-8d04-1942fd19bc0b.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-gemma-2-9B/dd306da8-60aa-4022-8d04-1942fd19bc0b.json
deleted file mode 100644
index d8a5be6e55c51c72e9d6a78bbc84cd85d9637882..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MT5-gemma-2-9B/dd306da8-60aa-4022-8d04-1942fd19bc0b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MT5-gemma-2-9B/1762652580.6267788",
- "retrieved_timestamp": "1762652580.6267798",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MT5-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MT5-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8047868544351211
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6112225549321132
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2258308157099698
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34312080536912754
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4203854166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4366688829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MTM-Merge-gemma-2-9B/e0354dac-3ad8-4342-92a9-be0182051cac.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MTM-Merge-gemma-2-9B/e0354dac-3ad8-4342-92a9-be0182051cac.json
deleted file mode 100644
index e95e65d61467bc8f50155ed711afb1dc34d71ee5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MTM-Merge-gemma-2-9B/e0354dac-3ad8-4342-92a9-be0182051cac.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MTM-Merge-gemma-2-9B/1762652580.626984",
- "retrieved_timestamp": "1762652580.626985",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MTM-Merge-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MTM-Merge-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7798075772284205
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6133348543973538
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2175226586102719
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3548657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4267708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43882978723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_MTMaMe-Merge_02012025163610-gemma-2-9B/b1a8ede3-2f27-4825-a413-e1772743b7c6.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_MTMaMe-Merge_02012025163610-gemma-2-9B/b1a8ede3-2f27-4825-a413-e1772743b7c6.json
deleted file mode 100644
index 98933922bb9611c203075e28a2f1b3af63622ea1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_MTMaMe-Merge_02012025163610-gemma-2-9B/b1a8ede3-2f27-4825-a413-e1772743b7c6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_MTMaMe-Merge_02012025163610-gemma-2-9B/1762652580.627192",
- "retrieved_timestamp": "1762652580.627192",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/MTMaMe-Merge_02012025163610-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17860277397305815
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6116794537284039
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09592145015105741
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3523489932885906
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42410416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43816489361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_Rv0.4DMv1t0.25-gemma-2-9B/522e1145-3f25-4b5d-9b6a-7ad0047b2da5.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_Rv0.4DMv1t0.25-gemma-2-9B/522e1145-3f25-4b5d-9b6a-7ad0047b2da5.json
deleted file mode 100644
index 0a77c0acf5ba5313b110426b40c8320340d64575..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_Rv0.4DMv1t0.25-gemma-2-9B/522e1145-3f25-4b5d-9b6a-7ad0047b2da5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_Rv0.4DMv1t0.25-gemma-2-9B/1762652580.627404",
- "retrieved_timestamp": "1762652580.627404",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/Rv0.4DMv1t0.25-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/Rv0.4DMv1t0.25-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7496575752337131
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6069712638522043
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2258308157099698
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34563758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43092708333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44007646276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_Rv0.4DMv1t0.25Tt0.25-gemma-2-9B/64790745-5edc-49d9-8111-822d54518b58.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_Rv0.4DMv1t0.25Tt0.25-gemma-2-9B/64790745-5edc-49d9-8111-822d54518b58.json
deleted file mode 100644
index 7c86ae516ed2ab35f56294ef89fc7eb36c500f21..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_Rv0.4DMv1t0.25Tt0.25-gemma-2-9B/64790745-5edc-49d9-8111-822d54518b58.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_Rv0.4DMv1t0.25Tt0.25-gemma-2-9B/1762652580.627618",
- "retrieved_timestamp": "1762652580.627619",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/Rv0.4DMv1t0.25Tt0.25-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7646200968984517
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6097862253440982
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20694864048338368
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3422818791946309
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4282916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43467420212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_Rv0.4MT4g2-gemma-2-9B/7e232332-cf13-4127-be18-1311921931e6.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_Rv0.4MT4g2-gemma-2-9B/7e232332-cf13-4127-be18-1311921931e6.json
deleted file mode 100644
index 5b83fe416ebb20cef2bda3afd8af40227488da1a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_Rv0.4MT4g2-gemma-2-9B/7e232332-cf13-4127-be18-1311921931e6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_Rv0.4MT4g2-gemma-2-9B/1762652580.627839",
- "retrieved_timestamp": "1762652580.62784",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/Rv0.4MT4g2-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/Rv0.4MT4g2-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7320221456845614
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.604119644415618
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19486404833836857
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35318791946308725
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4230833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44173869680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_T31122024203920-gemma-2-9B/f1312aef-339c-487a-b0fa-1bf4a77f0910.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_T31122024203920-gemma-2-9B/f1312aef-339c-487a-b0fa-1bf4a77f0910.json
deleted file mode 100644
index 85226854a9b88527647c89b72890b9619f33d583..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_T31122024203920-gemma-2-9B/f1312aef-339c-487a-b0fa-1bf4a77f0910.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_T31122024203920-gemma-2-9B/1762652580.628056",
- "retrieved_timestamp": "1762652580.628057",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/T31122024203920-gemma-2-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/T31122024203920-gemma-2-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7676176988169169
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6095634089448112
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2054380664652568
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35067114093959734
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4321979166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.437250664893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_Test01012025155054t0.5_gemma-2/73f07833-1d35-484f-8fe3-57f4c27e1277.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_Test01012025155054t0.5_gemma-2/73f07833-1d35-484f-8fe3-57f4c27e1277.json
deleted file mode 100644
index b85e9826519a5e54182ebe46e3631542513aba3e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_Test01012025155054t0.5_gemma-2/73f07833-1d35-484f-8fe3-57f4c27e1277.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_Test01012025155054t0.5_gemma-2/1762652580.628514",
- "retrieved_timestamp": "1762652580.628514",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/Test01012025155054t0.5_gemma-2",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/Test01012025155054t0.5_gemma-2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1555229014570229
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28295044895258115
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24161073825503357
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36702083333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10904255319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 3.817
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_gemma-2-S2MTM-9B/e0eb1bbf-923b-4bee-8390-288c21607e0e.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_gemma-2-S2MTM-9B/e0eb1bbf-923b-4bee-8390-288c21607e0e.json
deleted file mode 100644
index f85ad77edfb0163df1f60922be5c5339fa9e4d55..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_gemma-2-S2MTM-9B/e0eb1bbf-923b-4bee-8390-288c21607e0e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_gemma-2-S2MTM-9B/1762652580.628712",
- "retrieved_timestamp": "1762652580.628713",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/gemma-2-S2MTM-9B",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/gemma-2-S2MTM-9B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7822555264476034
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6060836790982922
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20468277945619334
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34563758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42184375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4296875
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25/b9ce6ed3-132a-44ed-9efc-dbfcc83d6799.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25/b9ce6ed3-132a-44ed-9efc-dbfcc83d6799.json
deleted file mode 100644
index 8a8858e6195ee2fa4a9b453da2dc222551575f10..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25/b9ce6ed3-132a-44ed-9efc-dbfcc83d6799.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25/1762652580.630025",
- "retrieved_timestamp": "1762652580.630029",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.25"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7706651684197928
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6075432245295168
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21450151057401812
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34312080536912754
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43226041666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4399933510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75/a2b9a953-31e2-4a6f-8005-993e1133246e.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75/a2b9a953-31e2-4a6f-8005-993e1133246e.json
deleted file mode 100644
index 8e7fd6e65872296a6758e184799b058c8fd7037b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75/a2b9a953-31e2-4a6f-8005-993e1133246e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75/1762652580.630381",
- "retrieved_timestamp": "1762652580.630382",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1-t0.75"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7208063493752133
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5995203934792884
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20166163141993956
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3498322147651007
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3951145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4140625
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1/6850eb56-9f2c-4d4f-a82a-29e24b81b8b3.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1/6850eb56-9f2c-4d4f-a82a-29e24b81b8b3.json
deleted file mode 100644
index 99d5c29b13c8f08f91408acea20bed447b2322fb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1/6850eb56-9f2c-4d4f-a82a-29e24b81b8b3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.1/1762652580.628911",
- "retrieved_timestamp": "1762652580.6289122",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7648949232480928
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6074511952177571
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2280966767371601
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3498322147651007
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41362499999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43209773936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.2/7f429355-b60b-4298-8eb0-a072a80898d7.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.2/7f429355-b60b-4298-8eb0-a072a80898d7.json
deleted file mode 100644
index 178bd494c08b05363f4268119f9e4e4af7ef9c11..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.2/7f429355-b60b-4298-8eb0-a072a80898d7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ataraxy-9B-v0.2/1762652580.6306539",
- "retrieved_timestamp": "1762652580.6306539",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/recoilme-gemma-2-Ataraxy-9B-v0.2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.759999024809727
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6066260664115647
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22280966767371602
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34815436241610737
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4109583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43226396276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1/774a3b0c-acae-4ad2-a2a6-42c30e1db7c0.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1/774a3b0c-acae-4ad2-a2a6-42c30e1db7c0.json
deleted file mode 100644
index 217203e5fbefed2dd4a4df6480be7d2c97758580..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1/774a3b0c-acae-4ad2-a2a6-42c30e1db7c0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1/1762652580.630864",
- "retrieved_timestamp": "1762652580.6308649",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/recoilme-gemma-2-Gutenberg-Doppel-9B-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7615227596111651
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6098779556010631
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20996978851963746
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3414429530201342
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43102083333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4315159574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ifable-9B-v0.1/e8502d8d-87bd-444c-b41b-7f8d4eb15b29.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ifable-9B-v0.1/e8502d8d-87bd-444c-b41b-7f8d4eb15b29.json
deleted file mode 100644
index b3e5918a4c0ad06f07a6a0c25b9fd923f36c5427..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-Ifable-9B-v0.1/e8502d8d-87bd-444c-b41b-7f8d4eb15b29.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-Ifable-9B-v0.1/1762652580.6310751",
- "retrieved_timestamp": "1762652580.631076",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/recoilme-gemma-2-Ifable-9B-v0.1",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/recoilme-gemma-2-Ifable-9B-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7943955371746965
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6064399292200404
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22054380664652568
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35151006711409394
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42022916666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4323470744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-psy10k-mental_healt-9B-v0.1/735bed66-1e83-4647-b730-14f0d571d597.json b/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-psy10k-mental_healt-9B-v0.1/735bed66-1e83-4647-b730-14f0d571d597.json
deleted file mode 100644
index 79b952a8e42d601fee9920d9a3348aba70391377..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/google/zelk12_recoilme-gemma-2-psy10k-mental_healt-9B-v0.1/735bed66-1e83-4647-b730-14f0d571d597.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/zelk12_recoilme-gemma-2-psy10k-mental_healt-9B-v0.1/1762652580.631496",
- "retrieved_timestamp": "1762652580.631499",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1",
- "developer": "google",
- "inference_platform": "unknown",
- "id": "zelk12/recoilme-gemma-2-psy10k-mental_healt-9B-v0.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.744536718130117
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.597759349920723
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18882175226586104
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34395973154362414
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42946875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41805186170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Gemma2ForCausalLM",
- "params_billions": 10.159
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/goulue5/goulue5_merging_LLM/a7fb7d77-93c3-41c8-a85a-692953dcd2c6.json b/leaderboard_data/HFOpenLLMv2/goulue5/goulue5_merging_LLM/a7fb7d77-93c3-41c8-a85a-692953dcd2c6.json
deleted file mode 100644
index 0f0b83177188d2b05d04f09f30ed014011216677..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/goulue5/goulue5_merging_LLM/a7fb7d77-93c3-41c8-a85a-692953dcd2c6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/goulue5_merging_LLM/1762652580.1806688",
- "retrieved_timestamp": "1762652580.18067",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "goulue5/merging_LLM",
- "developer": "goulue5",
- "inference_platform": "unknown",
- "id": "goulue5/merging_LLM"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32326006108237254
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4216498611590102
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09667673716012085
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43328125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29579454787234044
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/gradientai/gradientai_Llama-3-8B-Instruct-Gradient-1048k/79d366fc-e21c-4e5e-bb94-8d221d9df715.json b/leaderboard_data/HFOpenLLMv2/gradientai/gradientai_Llama-3-8B-Instruct-Gradient-1048k/79d366fc-e21c-4e5e-bb94-8d221d9df715.json
deleted file mode 100644
index 5f7b0332686c1a79bec184822fed25a8f429fd12..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/gradientai/gradientai_Llama-3-8B-Instruct-Gradient-1048k/79d366fc-e21c-4e5e-bb94-8d221d9df715.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/gradientai_Llama-3-8B-Instruct-Gradient-1048k/1762652580.181334",
- "retrieved_timestamp": "1762652580.181335",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "gradientai/Llama-3-8B-Instruct-Gradient-1048k",
- "developer": "gradientai",
- "inference_platform": "unknown",
- "id": "gradientai/Llama-3-8B-Instruct-Gradient-1048k"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4455588948434598
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4345903107069573
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05362537764350453
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42975
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29404920212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge/6b615d1d-7dab-4414-88a2-72fff1b5fce7.json b/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge/6b615d1d-7dab-4414-88a2-72fff1b5fce7.json
deleted file mode 100644
index 2f35b0662ac165a690964cb966b7054a5bbdc9fd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge/6b615d1d-7dab-4414-88a2-72fff1b5fce7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/grimjim_Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge/1762652580.1827798",
- "retrieved_timestamp": "1762652580.182781",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge",
- "developer": "grimjim",
- "inference_platform": "unknown",
- "id": "grimjim/Llama-3-Instruct-8B-SPPO-Iter3-SimPO-merge"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42712447417297217
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4961694535006833
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09969788519637462
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40432291666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3625332446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge/251c7560-4672-44a6-82df-2b8ce9a99a5e.json b/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge/251c7560-4672-44a6-82df-2b8ce9a99a5e.json
deleted file mode 100644
index 0db18b7a14925f298ef0b17168cd168275c5abc5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge/251c7560-4672-44a6-82df-2b8ce9a99a5e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/grimjim_Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge/1762652580.183053",
- "retrieved_timestamp": "1762652580.183053",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge",
- "developer": "grimjim",
- "inference_platform": "unknown",
- "id": "grimjim/Llama-3-Instruct-8B-SimPO-SPPO-Iter3-merge"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6805897241541332
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5021734091176594
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0891238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38851041666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3684341755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3.1-8B-Instruct-abliterated_via_adapter/377105ce-c655-47fe-a565-71a4de8c3683.json b/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3.1-8B-Instruct-abliterated_via_adapter/377105ce-c655-47fe-a565-71a4de8c3683.json
deleted file mode 100644
index 581c422f97108f05feaa103a7b29ff7e6755a394..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3.1-8B-Instruct-abliterated_via_adapter/377105ce-c655-47fe-a565-71a4de8c3683.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/grimjim_Llama-3.1-8B-Instruct-abliterated_via_adapter/1762652580.183267",
- "retrieved_timestamp": "1762652580.183268",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter",
- "developer": "grimjim",
- "inference_platform": "unknown",
- "id": "grimjim/Llama-3.1-8B-Instruct-abliterated_via_adapter"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48695018107510296
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.510526564708187
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13972809667673716
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40103125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3651097074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3.1-Bonsaikraft-8B-Instruct/5f15d683-bae4-4888-8d1c-352aac802fbe.json b/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3.1-Bonsaikraft-8B-Instruct/5f15d683-bae4-4888-8d1c-352aac802fbe.json
deleted file mode 100644
index e03ecc49732597803e63221a8de466748d58e475..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Llama-3.1-Bonsaikraft-8B-Instruct/5f15d683-bae4-4888-8d1c-352aac802fbe.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/grimjim_Llama-3.1-Bonsaikraft-8B-Instruct/1762652580.1834722",
- "retrieved_timestamp": "1762652580.1834729",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "grimjim/Llama-3.1-Bonsaikraft-8B-Instruct",
- "developer": "grimjim",
- "inference_platform": "unknown",
- "id": "grimjim/Llama-3.1-Bonsaikraft-8B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42500121898784116
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5286855891530357
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13141993957703926
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4235104166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3764128989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v2-12B/2cf86f7c-a9a8-48d0-bc10-e8a1f654092c.json b/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v2-12B/2cf86f7c-a9a8-48d0-bc10-e8a1f654092c.json
deleted file mode 100644
index dd39331e50af85e5e01eb6fd3663661451ab8d06..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v2-12B/2cf86f7c-a9a8-48d0-bc10-e8a1f654092c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v2-12B/1762652580.184318",
- "retrieved_timestamp": "1762652580.184319",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "grimjim/Magnolia-v2-12B",
- "developer": "grimjim",
- "inference_platform": "unknown",
- "id": "grimjim/Magnolia-v2-12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3506119318962575
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5290279354217235
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12915407854984895
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3187919463087248
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41712499999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3601230053191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v3-12B/68faa5a3-82ae-462d-adad-505134024710.json b/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v3-12B/68faa5a3-82ae-462d-adad-505134024710.json
deleted file mode 100644
index 77fb9812bd46c9969534376b51c4892b1caf7e3e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v3-12B/68faa5a3-82ae-462d-adad-505134024710.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v3-12B/1762652580.184813",
- "retrieved_timestamp": "1762652580.184814",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "grimjim/Magnolia-v3-12B",
- "developer": "grimjim",
- "inference_platform": "unknown",
- "id": "grimjim/Magnolia-v3-12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39649906692021614
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5326669270363916
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1351963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32550335570469796
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4183958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3615359042553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v4-12B/a48116ed-d4bf-4f06-94aa-2ef8364bd8d2.json b/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v4-12B/a48116ed-d4bf-4f06-94aa-2ef8364bd8d2.json
deleted file mode 100644
index 002dfe6acff5d8930da3ad7272222c3f2b0f2168..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v4-12B/a48116ed-d4bf-4f06-94aa-2ef8364bd8d2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v4-12B/1762652580.18525",
- "retrieved_timestamp": "1762652580.185251",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "grimjim/Magnolia-v4-12B",
- "developer": "grimjim",
- "inference_platform": "unknown",
- "id": "grimjim/Magnolia-v4-12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34179421712168156
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5430894084668724
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13141993957703926
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32802013422818793
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42112499999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3671875
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v5a-12B/ff64dcc7-9646-4c53-8b1e-68b62a025574.json b/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v5a-12B/ff64dcc7-9646-4c53-8b1e-68b62a025574.json
deleted file mode 100644
index 8b344aee1d50fe8861f560fd51603a29e687dd35..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/grimjim/grimjim_Magnolia-v5a-12B/ff64dcc7-9646-4c53-8b1e-68b62a025574.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/grimjim_Magnolia-v5a-12B/1762652580.185457",
- "retrieved_timestamp": "1762652580.185458",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "grimjim/Magnolia-v5a-12B",
- "developer": "grimjim",
- "inference_platform": "unknown",
- "id": "grimjim/Magnolia-v5a-12B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41136185321613317
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5311764105029141
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13746223564954682
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3221476510067114
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4144895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3601230053191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/gupta-tanish/gupta-tanish_llama-7b-dpo-baseline/1b962cb9-8754-40ab-b41a-b7cdf1fa3de1.json b/leaderboard_data/HFOpenLLMv2/gupta-tanish/gupta-tanish_llama-7b-dpo-baseline/1b962cb9-8754-40ab-b41a-b7cdf1fa3de1.json
deleted file mode 100644
index 8151178883dcca321ed102de6b1ec63efc235da4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/gupta-tanish/gupta-tanish_llama-7b-dpo-baseline/1b962cb9-8754-40ab-b41a-b7cdf1fa3de1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/gupta-tanish_llama-7b-dpo-baseline/1762652580.1871748",
- "retrieved_timestamp": "1762652580.1871748",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "gupta-tanish/llama-7b-dpo-baseline",
- "developer": "gupta-tanish",
- "inference_platform": "unknown",
- "id": "gupta-tanish/llama-7b-dpo-baseline"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26930433472076315
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3896894398264714
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.019637462235649546
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2625838926174497
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.445625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20279255319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.738
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube-1.8b-chat/ac8f78b5-a9e1-4e17-a1e7-8a7b8dc22a8d.json b/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube-1.8b-chat/ac8f78b5-a9e1-4e17-a1e7-8a7b8dc22a8d.json
deleted file mode 100644
index 5a85215d5526e9f093fd802a40182a52ecfae19a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube-1.8b-chat/ac8f78b5-a9e1-4e17-a1e7-8a7b8dc22a8d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube-1.8b-chat/1762652580.188648",
- "retrieved_timestamp": "1762652580.188649",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "h2oai/h2o-danube-1.8b-chat",
- "developer": "h2oai",
- "inference_platform": "unknown",
- "id": "h2oai/h2o-danube-1.8b-chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2198699450790569
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3219657593234448
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.013595166163141994
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25419463087248323
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3988645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13139960106382978
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 1.831
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3-4b-base/3878bb0d-753f-465a-a8c1-8408f8f5bfcf.json b/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3-4b-base/3878bb0d-753f-465a-a8c1-8408f8f5bfcf.json
deleted file mode 100644
index 7ceee720fabbe8bcda7c2504cabca26793248933..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3-4b-base/3878bb0d-753f-465a-a8c1-8408f8f5bfcf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube3-4b-base/1762652580.18891",
- "retrieved_timestamp": "1762652580.1889112",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "h2oai/h2o-danube3-4b-base",
- "developer": "h2oai",
- "inference_platform": "unknown",
- "id": "h2oai/h2o-danube3-4b-base"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23380851695722904
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3599083951265592
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.022658610271903322
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37781250000000005
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2109375
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.962
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3-4b-chat/d3df3cb7-5e79-49e5-9ed1-1e2771318915.json b/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3-4b-chat/d3df3cb7-5e79-49e5-9ed1-1e2771318915.json
deleted file mode 100644
index 9f9d53492c9cde7d5efa9e1ccb5cb14d9c95f85b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3-4b-chat/d3df3cb7-5e79-49e5-9ed1-1e2771318915.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube3-4b-chat/1762652580.1891232",
- "retrieved_timestamp": "1762652580.189124",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "h2oai/h2o-danube3-4b-chat",
- "developer": "h2oai",
- "inference_platform": "unknown",
- "id": "h2oai/h2o-danube3-4b-chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3628771659197596
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3466170643135169
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04078549848942598
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2600671140939597
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.378125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22282247340425532
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.962
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3-500m-chat/c917765b-a4b4-4e5d-9c11-eed791349daf.json b/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3-500m-chat/c917765b-a4b4-4e5d-9c11-eed791349daf.json
deleted file mode 100644
index 71977428b9b226b9b96964fe153c3fd795c878a7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3-500m-chat/c917765b-a4b4-4e5d-9c11-eed791349daf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube3-500m-chat/1762652580.1893299",
- "retrieved_timestamp": "1762652580.1893299",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "h2oai/h2o-danube3-500m-chat",
- "developer": "h2oai",
- "inference_platform": "unknown",
- "id": "h2oai/h2o-danube3-500m-chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2207941594968018
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3034691168308313
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.01661631419939577
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23070469798657717
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34339583333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11436170212765957
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 0.514
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3.1-4b-chat/5f5d83bd-91e9-416b-b40d-506f3861ed3f.json b/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3.1-4b-chat/5f5d83bd-91e9-416b-b40d-506f3861ed3f.json
deleted file mode 100644
index ac268c5f79994ba39b80232dfeacfb75c17bb62a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/h2oai/h2oai_h2o-danube3.1-4b-chat/5f5d83bd-91e9-416b-b40d-506f3861ed3f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/h2oai_h2o-danube3.1-4b-chat/1762652580.189556",
- "retrieved_timestamp": "1762652580.189557",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "h2oai/h2o-danube3.1-4b-chat",
- "developer": "h2oai",
- "inference_platform": "unknown",
- "id": "h2oai/h2o-danube3.1-4b-chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5021121734774842
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3608421638178268
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03323262839879154
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41015625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2718583776595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.962
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/haoranxu/haoranxu_ALMA-13B-R/9446f216-e3d6-4fca-ae00-937b4a76e5bf.json b/leaderboard_data/HFOpenLLMv2/haoranxu/haoranxu_ALMA-13B-R/9446f216-e3d6-4fca-ae00-937b4a76e5bf.json
deleted file mode 100644
index 14c091ead753a8e219214287b8259e5656757855..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/haoranxu/haoranxu_ALMA-13B-R/9446f216-e3d6-4fca-ae00-937b4a76e5bf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/haoranxu_ALMA-13B-R/1762652580.189782",
- "retrieved_timestamp": "1762652580.189783",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "haoranxu/ALMA-13B-R",
- "developer": "haoranxu",
- "inference_platform": "unknown",
- "id": "haoranxu/ALMA-13B-R"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.003921816336210145
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.345656261205981
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.017371601208459216
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2575503355704698
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35279166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18168218085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "?",
- "params_billions": 13.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/haoranxu/haoranxu_Llama-3-Instruct-8B-CPO-SimPO/aa67ad0b-e469-4b49-a797-4542370a2e94.json b/leaderboard_data/HFOpenLLMv2/haoranxu/haoranxu_Llama-3-Instruct-8B-CPO-SimPO/aa67ad0b-e469-4b49-a797-4542370a2e94.json
deleted file mode 100644
index c837d275dd90fd5c47a2869c5a46aecf7074ac78..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/haoranxu/haoranxu_Llama-3-Instruct-8B-CPO-SimPO/aa67ad0b-e469-4b49-a797-4542370a2e94.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/haoranxu_Llama-3-Instruct-8B-CPO-SimPO/1762652580.190052",
- "retrieved_timestamp": "1762652580.190052",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "haoranxu/Llama-3-Instruct-8B-CPO-SimPO",
- "developer": "haoranxu",
- "inference_platform": "unknown",
- "id": "haoranxu/Llama-3-Instruct-8B-CPO-SimPO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7046447869430887
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5048301774821616
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1027190332326284
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29278523489932884
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3566666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3686003989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/haoranxu/haoranxu_Llama-3-Instruct-8B-SimPO/39aa4e41-376f-4ee6-8925-8bf746a871a0.json b/leaderboard_data/HFOpenLLMv2/haoranxu/haoranxu_Llama-3-Instruct-8B-SimPO/39aa4e41-376f-4ee6-8925-8bf746a871a0.json
deleted file mode 100644
index d2caac9c630d7c9d878aaa36e576dead451c5e18..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/haoranxu/haoranxu_Llama-3-Instruct-8B-SimPO/39aa4e41-376f-4ee6-8925-8bf746a871a0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/haoranxu_Llama-3-Instruct-8B-SimPO/1762652580.190277",
- "retrieved_timestamp": "1762652580.1902778",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "haoranxu/Llama-3-Instruct-8B-SimPO",
- "developer": "haoranxu",
- "inference_platform": "unknown",
- "id": "haoranxu/Llama-3-Instruct-8B-SimPO"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7347449212533854
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49792360151415016
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08761329305135952
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35660416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37333776595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/hatemmahmoud/hatemmahmoud_qwen2.5-1.5b-sft-raft-grpo-hra-doc/7d3c185f-4b4f-4bdd-bac9-f4ba2410f40c.json b/leaderboard_data/HFOpenLLMv2/hatemmahmoud/hatemmahmoud_qwen2.5-1.5b-sft-raft-grpo-hra-doc/7d3c185f-4b4f-4bdd-bac9-f4ba2410f40c.json
deleted file mode 100644
index 8d3d3a6e3e8519f99a9f08ed969cdbd3b45e6ece..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/hatemmahmoud/hatemmahmoud_qwen2.5-1.5b-sft-raft-grpo-hra-doc/7d3c185f-4b4f-4bdd-bac9-f4ba2410f40c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hatemmahmoud_qwen2.5-1.5b-sft-raft-grpo-hra-doc/1762652580.190489",
- "retrieved_timestamp": "1762652580.190489",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc",
- "developer": "hatemmahmoud",
- "inference_platform": "unknown",
- "id": "hatemmahmoud/qwen2.5-1.5b-sft-raft-grpo-hra-doc"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41958004760701606
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4269926809768501
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2175226586102719
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2676174496644295
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36097916666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.277593085106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.544
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/hon9kon9ize/hon9kon9ize_CantoneseLLMChat-v0.5/6e87be06-ca0e-48a4-ae28-4a5794600117.json b/leaderboard_data/HFOpenLLMv2/hon9kon9ize/hon9kon9ize_CantoneseLLMChat-v0.5/6e87be06-ca0e-48a4-ae28-4a5794600117.json
deleted file mode 100644
index 1f28be60e66417ad946ad0f82f25923a7147fc21..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/hon9kon9ize/hon9kon9ize_CantoneseLLMChat-v0.5/6e87be06-ca0e-48a4-ae28-4a5794600117.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hon9kon9ize_CantoneseLLMChat-v0.5/1762652580.190754",
- "retrieved_timestamp": "1762652580.1907551",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hon9kon9ize/CantoneseLLMChat-v0.5",
- "developer": "hon9kon9ize",
- "inference_platform": "unknown",
- "id": "hon9kon9ize/CantoneseLLMChat-v0.5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3230849701015528
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43452388803059244
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04154078549848943
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4706458333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2504155585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.069
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/hon9kon9ize/hon9kon9ize_CantoneseLLMChat-v1.0-7B/cccf983e-e1b8-4f0f-b147-abccdea65548.json b/leaderboard_data/HFOpenLLMv2/hon9kon9ize/hon9kon9ize_CantoneseLLMChat-v1.0-7B/cccf983e-e1b8-4f0f-b147-abccdea65548.json
deleted file mode 100644
index f24222406d570e42ee85090af6410a88f8d69c0c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/hon9kon9ize/hon9kon9ize_CantoneseLLMChat-v1.0-7B/cccf983e-e1b8-4f0f-b147-abccdea65548.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hon9kon9ize_CantoneseLLMChat-v1.0-7B/1762652580.191013",
- "retrieved_timestamp": "1762652580.191013",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hon9kon9ize/CantoneseLLMChat-v1.0-7B",
- "developer": "hon9kon9ize",
- "inference_platform": "unknown",
- "id": "hon9kon9ize/CantoneseLLMChat-v1.0-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44548353923146145
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4865734655539633
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2107250755287009
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3221476510067114
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3882916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3784906914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/hongbai12/hongbai12_li-0.4-pre/ab7dcb4c-3884-428f-b342-38034dd51b56.json b/leaderboard_data/HFOpenLLMv2/hongbai12/hongbai12_li-0.4-pre/ab7dcb4c-3884-428f-b342-38034dd51b56.json
deleted file mode 100644
index 041421a78dcede8cebcb60562cc80a69c7e3bfd6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/hongbai12/hongbai12_li-0.4-pre/ab7dcb4c-3884-428f-b342-38034dd51b56.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hongbai12_li-0.4-pre/1762652580.191224",
- "retrieved_timestamp": "1762652580.191225",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hongbai12/li-0.4-pre",
- "developer": "hongbai12",
- "inference_platform": "unknown",
- "id": "hongbai12/li-0.4-pre"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5199725616918665
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6298274927108823
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49244712990936557
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32298657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4513020833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5014960106382979
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_Falcon3Slerp1-10B/376d342c-669b-4c76-9e7b-d49566ac441d.json b/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_Falcon3Slerp1-10B/376d342c-669b-4c76-9e7b-d49566ac441d.json
deleted file mode 100644
index 4fb45f91cb84040ea9ed47e588d426a9387c37c6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_Falcon3Slerp1-10B/376d342c-669b-4c76-9e7b-d49566ac441d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_Falcon3Slerp1-10B/1762652580.19171",
- "retrieved_timestamp": "1762652580.191711",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/Falcon3Slerp1-10B",
- "developer": "hotmailuser",
- "inference_platform": "unknown",
- "id": "hotmailuser/Falcon3Slerp1-10B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5694069513335727
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.616984966186231
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2598187311178248
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34395973154362414
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43176041666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4401595744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.306
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_Falcon3Slerp2-10B/bae0b772-8ae6-4fed-ae78-d6d83e560a95.json b/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_Falcon3Slerp2-10B/bae0b772-8ae6-4fed-ae78-d6d83e560a95.json
deleted file mode 100644
index 0e843e80c447a4e57d7582922ade2ae8e1fe987e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_Falcon3Slerp2-10B/bae0b772-8ae6-4fed-ae78-d6d83e560a95.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_Falcon3Slerp2-10B/1762652580.191951",
- "retrieved_timestamp": "1762652580.191952",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/Falcon3Slerp2-10B",
- "developer": "hotmailuser",
- "inference_platform": "unknown",
- "id": "hotmailuser/Falcon3Slerp2-10B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6117966994241945
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6164263500746402
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23187311178247735
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33808724832214765
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4095625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4369182180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.306
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_Falcon3Slerp4-10B/d5466af4-2bef-4ce8-a659-9e05a5e674b6.json b/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_Falcon3Slerp4-10B/d5466af4-2bef-4ce8-a659-9e05a5e674b6.json
deleted file mode 100644
index fd6f6c77c944772a38b64a5de6246db3b70d3284..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_Falcon3Slerp4-10B/d5466af4-2bef-4ce8-a659-9e05a5e674b6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_Falcon3Slerp4-10B/1762652580.19215",
- "retrieved_timestamp": "1762652580.192151",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/Falcon3Slerp4-10B",
- "developer": "hotmailuser",
- "inference_platform": "unknown",
- "id": "hotmailuser/Falcon3Slerp4-10B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6072254950198805
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.611433776236228
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22885196374622357
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3288590604026846
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40175
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4387466755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.306
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp-3B/2db7aa3c-4969-40c0-b8c6-1ff5c953ba23.json b/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp-3B/2db7aa3c-4969-40c0-b8c6-1ff5c953ba23.json
deleted file mode 100644
index cdc181d82e85b0160ee3b0d748adc68e0239b388..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp-3B/2db7aa3c-4969-40c0-b8c6-1ff5c953ba23.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp-3B/1762652580.19236",
- "retrieved_timestamp": "1762652580.1923609",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/FalconSlerp-3B",
- "developer": "hotmailuser",
- "inference_platform": "unknown",
- "id": "hotmailuser/FalconSlerp-3B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5694568190179834
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46239111387485293
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17598187311178248
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.287751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3989270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29679188829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 3.228
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp1-7B/5d01fa6d-4280-4926-b166-e98892ee60f4.json b/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp1-7B/5d01fa6d-4280-4926-b166-e98892ee60f4.json
deleted file mode 100644
index 29d7c94608e762eacab8d110d759ec024100b891..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp1-7B/5d01fa6d-4280-4926-b166-e98892ee60f4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp1-7B/1762652580.1925812",
- "retrieved_timestamp": "1762652580.192582",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/FalconSlerp1-7B",
- "developer": "hotmailuser",
- "inference_platform": "unknown",
- "id": "hotmailuser/FalconSlerp1-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5394564200765082
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5354677787663963
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23791540785498488
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44525
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4128989361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 7.456
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp2-7B/fc8605ad-f7b9-4a73-afd3-85b996fc2549.json b/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp2-7B/fc8605ad-f7b9-4a73-afd3-85b996fc2549.json
deleted file mode 100644
index 41eff7414f97c87bb751bd1f96893694e97d658f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp2-7B/fc8605ad-f7b9-4a73-afd3-85b996fc2549.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp2-7B/1762652580.1928341",
- "retrieved_timestamp": "1762652580.192835",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/FalconSlerp2-7B",
- "developer": "hotmailuser",
- "inference_platform": "unknown",
- "id": "hotmailuser/FalconSlerp2-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6160432097944565
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5537805428914538
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2983383685800604
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3196308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44788541666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4140625
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 7.456
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp3-10B/f933fbc2-370e-4231-94a9-c833c2aa793d.json b/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp3-10B/f933fbc2-370e-4231-94a9-c833c2aa793d.json
deleted file mode 100644
index beaf60bd2b586991086d222baddad8598b5840a2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp3-10B/f933fbc2-370e-4231-94a9-c833c2aa793d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp3-10B/1762652580.1930392",
- "retrieved_timestamp": "1762652580.19304",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/FalconSlerp3-10B",
- "developer": "hotmailuser",
- "inference_platform": "unknown",
- "id": "hotmailuser/FalconSlerp3-10B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6001564737119731
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6060288025434474
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22734138972809667
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33557046979865773
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4030833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4323470744680851
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.306
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp3-7B/017a681e-1bbb-4890-bfcc-f276954678e1.json b/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp3-7B/017a681e-1bbb-4890-bfcc-f276954678e1.json
deleted file mode 100644
index 7f506426dce3944c317177b6d775b14fa2571528..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp3-7B/017a681e-1bbb-4890-bfcc-f276954678e1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp3-7B/1762652580.193249",
- "retrieved_timestamp": "1762652580.19325",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/FalconSlerp3-7B",
- "developer": "hotmailuser",
- "inference_platform": "unknown",
- "id": "hotmailuser/FalconSlerp3-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6096235765546527
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5532966528909408
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3157099697885196
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3187919463087248
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45067708333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41273271276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 7.456
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp4-7B/d6ac7c9f-212e-4000-b89e-d977122d2e2b.json b/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp4-7B/d6ac7c9f-212e-4000-b89e-d977122d2e2b.json
deleted file mode 100644
index aeab40c12ea7f3083c3d3016a242a2dd26fbc96d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp4-7B/d6ac7c9f-212e-4000-b89e-d977122d2e2b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp4-7B/1762652580.193457",
- "retrieved_timestamp": "1762652580.1934578",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/FalconSlerp4-7B",
- "developer": "hotmailuser",
- "inference_platform": "unknown",
- "id": "hotmailuser/FalconSlerp4-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6284580468711907
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5523506352993854
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2212990936555891
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33221476510067116
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4585208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4031748670212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 7.456
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp6-7B/88a4587f-d3d4-4b08-b800-13a2daf4a660.json b/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp6-7B/88a4587f-d3d4-4b08-b800-13a2daf4a660.json
deleted file mode 100644
index ae93e5f22086277e344417f591956ff6b8776c9e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_FalconSlerp6-7B/88a4587f-d3d4-4b08-b800-13a2daf4a660.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_FalconSlerp6-7B/1762652580.193665",
- "retrieved_timestamp": "1762652580.193666",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/FalconSlerp6-7B",
- "developer": "hotmailuser",
- "inference_platform": "unknown",
- "id": "hotmailuser/FalconSlerp6-7B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6026542906155667
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5383801786207648
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20468277945619334
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3179530201342282
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44921875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39951795212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 7.456
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_RombosBeagle-v2beta-MGS-32B/c507c0ac-759a-4013-8dd0-7ab5a959ca65.json b/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_RombosBeagle-v2beta-MGS-32B/c507c0ac-759a-4013-8dd0-7ab5a959ca65.json
deleted file mode 100644
index 8c92345067d4c51b90d3effc34f7d2707df07069..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/hotmailuser/hotmailuser_RombosBeagle-v2beta-MGS-32B/c507c0ac-759a-4013-8dd0-7ab5a959ca65.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/hotmailuser_RombosBeagle-v2beta-MGS-32B/1762652580.199307",
- "retrieved_timestamp": "1762652580.199308",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "hotmailuser/RombosBeagle-v2beta-MGS-32B",
- "developer": "hotmailuser",
- "inference_platform": "unknown",
- "id": "hotmailuser/RombosBeagle-v2beta-MGS-32B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5156761836371937
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7037350002757341
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49924471299093653
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3800335570469799
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5020833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5907579787234043
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_QwQ-32B-Coder-Fusion-7030/5fb3b31d-8c2c-4d76-8532-1bff0f793f4b.json b/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_QwQ-32B-Coder-Fusion-7030/5fb3b31d-8c2c-4d76-8532-1bff0f793f4b.json
deleted file mode 100644
index 247302ea67ca857b30c25ffa6c09fbe0fb8bc504..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_QwQ-32B-Coder-Fusion-7030/5fb3b31d-8c2c-4d76-8532-1bff0f793f4b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/huihui-ai_QwQ-32B-Coder-Fusion-7030/1762652580.2006452",
- "retrieved_timestamp": "1762652580.200646",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "huihui-ai/QwQ-32B-Coder-Fusion-7030",
- "developer": "huihui-ai",
- "inference_platform": "unknown",
- "id": "huihui-ai/QwQ-32B-Coder-Fusion-7030"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38650779930584184
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6177864730931621
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2794561933534743
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39222916666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4367519946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_QwQ-32B-Coder-Fusion-8020/461ee093-b573-4ce9-9168-c9852dc9745b.json b/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_QwQ-32B-Coder-Fusion-8020/461ee093-b573-4ce9-9168-c9852dc9745b.json
deleted file mode 100644
index 7849a5bed152924dfb177a963267a435054bcb83..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_QwQ-32B-Coder-Fusion-8020/461ee093-b573-4ce9-9168-c9852dc9745b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/huihui-ai_QwQ-32B-Coder-Fusion-8020/1762652580.200916",
- "retrieved_timestamp": "1762652580.200917",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "huihui-ai/QwQ-32B-Coder-Fusion-8020",
- "developer": "huihui-ai",
- "inference_platform": "unknown",
- "id": "huihui-ai/QwQ-32B-Coder-Fusion-8020"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6020547702318737
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6664531829718748
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.459214501510574
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3548657718120805
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42934374999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5367353723404256
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_QwQ-32B-Coder-Fusion-9010/41d5fb44-855b-4ff1-8f5d-95b8a9f9a9af.json b/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_QwQ-32B-Coder-Fusion-9010/41d5fb44-855b-4ff1-8f5d-95b8a9f9a9af.json
deleted file mode 100644
index 4e2cd9c3642dcb2905d336e63bd5bb0c2d7802af..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_QwQ-32B-Coder-Fusion-9010/41d5fb44-855b-4ff1-8f5d-95b8a9f9a9af.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/huihui-ai_QwQ-32B-Coder-Fusion-9010/1762652580.201131",
- "retrieved_timestamp": "1762652580.201132",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "huihui-ai/QwQ-32B-Coder-Fusion-9010",
- "developer": "huihui-ai",
- "inference_platform": "unknown",
- "id": "huihui-ai/QwQ-32B-Coder-Fusion-9010"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5778246164620984
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6727405551499568
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5317220543806647
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3615771812080537
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4681979166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5600066489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 32.764
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-14B-Instruct-abliterated-v2/92cad41b-64b5-48db-b865-77d0ea2ef834.json b/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-14B-Instruct-abliterated-v2/92cad41b-64b5-48db-b865-77d0ea2ef834.json
deleted file mode 100644
index a4199f2decdbd41f91f10969efa338d1942c98fa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-14B-Instruct-abliterated-v2/92cad41b-64b5-48db-b865-77d0ea2ef834.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/huihui-ai_Qwen2.5-14B-Instruct-abliterated-v2/1762652580.201351",
- "retrieved_timestamp": "1762652580.201352",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2",
- "developer": "huihui-ai",
- "inference_platform": "unknown",
- "id": "huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8327637335602867
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6323822447052897
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5302114803625377
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3338926174496644
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42196875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49617686170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 14.77
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-72B-Instruct-abliterated/b892c2f3-4aa6-4b19-80e5-1b0f5e0eda25.json b/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-72B-Instruct-abliterated/b892c2f3-4aa6-4b19-80e5-1b0f5e0eda25.json
deleted file mode 100644
index c092ca338c8d0352ef7548e985fb9086006036e3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-72B-Instruct-abliterated/b892c2f3-4aa6-4b19-80e5-1b0f5e0eda25.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/huihui-ai_Qwen2.5-72B-Instruct-abliterated/1762652580.2015731",
- "retrieved_timestamp": "1762652580.2015731",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "huihui-ai/Qwen2.5-72B-Instruct-abliterated",
- "developer": "huihui-ai",
- "inference_platform": "unknown",
- "id": "huihui-ai/Qwen2.5-72B-Instruct-abliterated"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.8592667455684251
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7189881596250237
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6012084592145015
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3951342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4232708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5536901595744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 72.706
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-7B-Instruct-abliterated-v2/15c4b42b-ee8f-4f0d-8d54-7d827133fe7f.json b/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-7B-Instruct-abliterated-v2/15c4b42b-ee8f-4f0d-8d54-7d827133fe7f.json
deleted file mode 100644
index 1a268fe24f67ab658f52ecb2d52aabd624e711ad..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-7B-Instruct-abliterated-v2/15c4b42b-ee8f-4f0d-8d54-7d827133fe7f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/huihui-ai_Qwen2.5-7B-Instruct-abliterated-v2/1762652580.201998",
- "retrieved_timestamp": "1762652580.201998",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2",
- "developer": "huihui-ai",
- "inference_platform": "unknown",
- "id": "huihui-ai/Qwen2.5-7B-Instruct-abliterated-v2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7606484128778308
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5376688442794247
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4637462235649547
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3980625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42079454787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-7B-Instruct-abliterated/625501d4-5d1e-48e0-8690-e301c51f652d.json b/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-7B-Instruct-abliterated/625501d4-5d1e-48e0-8690-e301c51f652d.json
deleted file mode 100644
index 4d27c50fea579158be012af15615a04a224ce059..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/huihui-ai/huihui-ai_Qwen2.5-7B-Instruct-abliterated/625501d4-5d1e-48e0-8690-e301c51f652d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/huihui-ai_Qwen2.5-7B-Instruct-abliterated/1762652580.201783",
- "retrieved_timestamp": "1762652580.2017841",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "huihui-ai/Qwen2.5-7B-Instruct-abliterated",
- "developer": "huihui-ai",
- "inference_platform": "unknown",
- "id": "huihui-ai/Qwen2.5-7B-Instruct-abliterated"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7546033413564897
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5261589972829911
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45770392749244715
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31543624161073824
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39666666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41796875
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/huu-ontocord/huu-ontocord_wide_3b_orpo_stage1.1-ss1-orpo3/50854a36-b87e-421d-b8d5-7a46054ecc59.json b/leaderboard_data/HFOpenLLMv2/huu-ontocord/huu-ontocord_wide_3b_orpo_stage1.1-ss1-orpo3/50854a36-b87e-421d-b8d5-7a46054ecc59.json
deleted file mode 100644
index b87f1aa3e036c05066a6e5d6db030d2d33f4bf6d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/huu-ontocord/huu-ontocord_wide_3b_orpo_stage1.1-ss1-orpo3/50854a36-b87e-421d-b8d5-7a46054ecc59.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/huu-ontocord_wide_3b_orpo_stage1.1-ss1-orpo3/1762652580.202209",
- "retrieved_timestamp": "1762652580.20221",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3",
- "developer": "huu-ontocord",
- "inference_platform": "unknown",
- "id": "huu-ontocord/wide_3b_orpo_stage1.1-ss1-orpo3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15052726764983576
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936618285636837
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.009818731117824773
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25838926174496646
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36178125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11643949468085106
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 3.759
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/iRyanBell/iRyanBell_ARC1-II/19afc23f-5849-4147-b240-9bb7ddea4d58.json b/leaderboard_data/HFOpenLLMv2/iRyanBell/iRyanBell_ARC1-II/19afc23f-5849-4147-b240-9bb7ddea4d58.json
deleted file mode 100644
index a19cbb2608ca9c2867549f13529eacaf074cef20..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/iRyanBell/iRyanBell_ARC1-II/19afc23f-5849-4147-b240-9bb7ddea4d58.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/iRyanBell_ARC1-II/1762652580.204559",
- "retrieved_timestamp": "1762652580.204561",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "iRyanBell/ARC1-II",
- "developer": "iRyanBell",
- "inference_platform": "unknown",
- "id": "iRyanBell/ARC1-II"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17083560508340093
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33817781029884353
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02190332326283988
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27181208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4912916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1685505319148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/iRyanBell/iRyanBell_ARC1/62f9b47d-2860-44b3-8abb-3d441f4bdeb4.json b/leaderboard_data/HFOpenLLMv2/iRyanBell/iRyanBell_ARC1/62f9b47d-2860-44b3-8abb-3d441f4bdeb4.json
deleted file mode 100644
index 891c3fc0ada17a30cc8da595299b7c30fa939a59..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/iRyanBell/iRyanBell_ARC1/62f9b47d-2860-44b3-8abb-3d441f4bdeb4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/iRyanBell_ARC1/1762652580.204204",
- "retrieved_timestamp": "1762652580.204204",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "iRyanBell/ARC1",
- "developer": "iRyanBell",
- "inference_platform": "unknown",
- "id": "iRyanBell/ARC1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.441112913735555
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4902999658144703
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06873111782477341
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3990520833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3371010638297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibivibiv/ibivibiv_colossus_120b/f0bcf710-b1a8-4736-9fd3-6b0ea241155e.json b/leaderboard_data/HFOpenLLMv2/ibivibiv/ibivibiv_colossus_120b/f0bcf710-b1a8-4736-9fd3-6b0ea241155e.json
deleted file mode 100644
index 3e8c4d80bdce9b6650b38ea62afa6ff2a92abbef..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibivibiv/ibivibiv_colossus_120b/f0bcf710-b1a8-4736-9fd3-6b0ea241155e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibivibiv_colossus_120b/1762652580.2048829",
- "retrieved_timestamp": "1762652580.204884",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibivibiv/colossus_120b",
- "developer": "ibivibiv",
- "inference_platform": "unknown",
- "id": "ibivibiv/colossus_120b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42759877126025614
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6061408586494191
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05664652567975831
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4733125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3961103723404255
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 117.749
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibivibiv/ibivibiv_multimaster-7b-v6/7044a4d4-1c07-40ef-917c-d242b61d7877.json b/leaderboard_data/HFOpenLLMv2/ibivibiv/ibivibiv_multimaster-7b-v6/7044a4d4-1c07-40ef-917c-d242b61d7877.json
deleted file mode 100644
index 3db4609239c15b784758708f18244e1624c57211..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibivibiv/ibivibiv_multimaster-7b-v6/7044a4d4-1c07-40ef-917c-d242b61d7877.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibivibiv_multimaster-7b-v6/1762652580.205187",
- "retrieved_timestamp": "1762652580.205188",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibivibiv/multimaster-7b-v6",
- "developer": "ibivibiv",
- "inference_platform": "unknown",
- "id": "ibivibiv/multimaster-7b-v6"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4473075883101283
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.519351871026721
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.055891238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43957291666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30950797872340424
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MixtralForCausalLM",
- "params_billions": 35.428
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-1b-a400m-base/52e253ba-0291-4e78-b292-806cabe74697.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-1b-a400m-base/52e253ba-0291-4e78-b292-806cabe74697.json
deleted file mode 100644
index 89c7caafdd636f2cff1ae829ac4500900ea1fc0a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-1b-a400m-base/52e253ba-0291-4e78-b292-806cabe74697.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-1b-a400m-base/1762652580.205958",
- "retrieved_timestamp": "1762652580.20596",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibm-granite/granite-3.0-1b-a400m-base",
- "developer": "ibm-granite",
- "inference_platform": "unknown",
- "id": "ibm-granite/granite-3.0-1b-a400m-base"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24040324117785256
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3221205531032148
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.026435045317220542
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24748322147651006
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3367291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11519281914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GraniteForCausalLM",
- "params_billions": 1.335
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-1b-a400m-instruct/afc49838-c7fc-40ed-841f-74b0bc3dd36e.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-1b-a400m-instruct/afc49838-c7fc-40ed-841f-74b0bc3dd36e.json
deleted file mode 100644
index c956d5079e9084909295cb6e65031f88704d28c1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-1b-a400m-instruct/afc49838-c7fc-40ed-841f-74b0bc3dd36e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-1b-a400m-instruct/1762652580.206321",
- "retrieved_timestamp": "1762652580.206322",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibm-granite/granite-3.0-1b-a400m-instruct",
- "developer": "ibm-granite",
- "inference_platform": "unknown",
- "id": "ibm-granite/granite-3.0-1b-a400m-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33315159332792543
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3223950988485842
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.027945619335347432
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2609060402684564
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36228124999999994
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12441821808510638
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GraniteForCausalLM",
- "params_billions": 1.335
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-2b-base/184f8ef6-7cb7-45f2-b983-70dc4503a968.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-2b-base/184f8ef6-7cb7-45f2-b983-70dc4503a968.json
deleted file mode 100644
index e299c3e5a57c6c76c58102b78e6ccbccbf3520e4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-2b-base/184f8ef6-7cb7-45f2-b983-70dc4503a968.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-2b-base/1762652580.206552",
- "retrieved_timestamp": "1762652580.206552",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibm-granite/granite-3.0-2b-base",
- "developer": "ibm-granite",
- "inference_platform": "unknown",
- "id": "ibm-granite/granite-3.0-2b-base"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3873821460391761
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40474805593806223
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.054380664652567974
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28020134228187926
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3434270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23811502659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GraniteForCausalLM",
- "params_billions": 2.634
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-2b-instruct/ec853cc1-7c48-4334-9ff6-d9669750570b.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-2b-instruct/ec853cc1-7c48-4334-9ff6-d9669750570b.json
deleted file mode 100644
index f557c6b8a8f315e40d551a5619d2a5fce2f695c9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-2b-instruct/ec853cc1-7c48-4334-9ff6-d9669750570b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-2b-instruct/1762652580.206777",
- "retrieved_timestamp": "1762652580.206777",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibm-granite/granite-3.0-2b-instruct",
- "developer": "ibm-granite",
- "inference_platform": "unknown",
- "id": "ibm-granite/granite-3.0-2b-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.513977357854936
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44119772062630297
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09214501510574018
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35148958333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2814162234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GraniteForCausalLM",
- "params_billions": 2.634
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-3b-a800m-base/f917bdff-4be5-440b-8e62-bb9f7b0dd0f5.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-3b-a800m-base/f917bdff-4be5-440b-8e62-bb9f7b0dd0f5.json
deleted file mode 100644
index 7b7e69638718aed3058b404af48b2acf57cb6b91..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-3b-a800m-base/f917bdff-4be5-440b-8e62-bb9f7b0dd0f5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-3b-a800m-base/1762652580.20698",
- "retrieved_timestamp": "1762652580.20698",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibm-granite/granite-3.0-3b-a800m-base",
- "developer": "ibm-granite",
- "inference_platform": "unknown",
- "id": "ibm-granite/granite-3.0-3b-a800m-base"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2732261510569733
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36674974971308566
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04833836858006042
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2516778523489933
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34196875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18907912234042554
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GraniteForCausalLM",
- "params_billions": 3.374
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-3b-a800m-instruct/7c92caf5-df83-4c8e-ab85-f99c7ac43f63.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-3b-a800m-instruct/7c92caf5-df83-4c8e-ab85-f99c7ac43f63.json
deleted file mode 100644
index baf2237013cf87d7a08bf71d2dd0b5912a8660eb..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-3b-a800m-instruct/7c92caf5-df83-4c8e-ab85-f99c7ac43f63.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-3b-a800m-instruct/1762652580.2071838",
- "retrieved_timestamp": "1762652580.2071848",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibm-granite/granite-3.0-3b-a800m-instruct",
- "developer": "ibm-granite",
- "inference_platform": "unknown",
- "id": "ibm-granite/granite-3.0-3b-a800m-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4298217618142085
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37527805291733446
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0702416918429003
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28104026845637586
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3486666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21517619680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GraniteForCausalLM",
- "params_billions": 3.374
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-8b-base/b7b71327-323b-4b7c-92a1-426911bed479.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-8b-base/b7b71327-323b-4b7c-92a1-426911bed479.json
deleted file mode 100644
index a7bfa635de7cc08af302f2d322edfb6dff96cae1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-8b-base/b7b71327-323b-4b7c-92a1-426911bed479.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-8b-base/1762652580.207386",
- "retrieved_timestamp": "1762652580.207386",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibm-granite/granite-3.0-8b-base",
- "developer": "ibm-granite",
- "inference_platform": "unknown",
- "id": "ibm-granite/granite-3.0-8b-base"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4583482936386566
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4943760637365333
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10120845921450151
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32550335570469796
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40813541666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3312832446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GraniteForCausalLM",
- "params_billions": 8.171
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-8b-instruct/d4dc4d78-33a3-428c-9490-382dd0c19c08.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-8b-instruct/d4dc4d78-33a3-428c-9490-382dd0c19c08.json
deleted file mode 100644
index bb0842ef7c7449595f9dc3650d713c940f8ab0d5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.0-8b-instruct/d4dc4d78-33a3-428c-9490-382dd0c19c08.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.0-8b-instruct/1762652580.207594",
- "retrieved_timestamp": "1762652580.207595",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibm-granite/granite-3.0-8b-instruct",
- "developer": "ibm-granite",
- "inference_platform": "unknown",
- "id": "ibm-granite/granite-3.0-8b-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5309633993359841
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5191874631840226
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1419939577039275
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33221476510067116
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3900625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34566156914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GraniteForCausalLM",
- "params_billions": 8.171
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-1b-a400m-base/17192714-a653-428d-a7c7-06dd41db77fa.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-1b-a400m-base/17192714-a653-428d-a7c7-06dd41db77fa.json
deleted file mode 100644
index cebdea0f81c55beb811966a56c30d88d4f08eda2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-1b-a400m-base/17192714-a653-428d-a7c7-06dd41db77fa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-1b-a400m-base/1762652580.207968",
- "retrieved_timestamp": "1762652580.2079701",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibm-granite/granite-3.1-1b-a400m-base",
- "developer": "ibm-granite",
- "inference_platform": "unknown",
- "id": "ibm-granite/granite-3.1-1b-a400m-base"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2519437315212525
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3298699546506724
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.027190332326283987
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2516778523489933
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3500625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11394614361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GraniteMoeForCausalLM",
- "params_billions": 1.335
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-1b-a400m-instruct/8167695b-db96-4687-91b8-0af55e67a606.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-1b-a400m-instruct/8167695b-db96-4687-91b8-0af55e67a606.json
deleted file mode 100644
index 77a79c6d61f5b531206bbaacf5b0ac72115ad9c1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-1b-a400m-instruct/8167695b-db96-4687-91b8-0af55e67a606.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-1b-a400m-instruct/1762652580.208256",
- "retrieved_timestamp": "1762652580.208257",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibm-granite/granite-3.1-1b-a400m-instruct",
- "developer": "ibm-granite",
- "inference_platform": "unknown",
- "id": "ibm-granite/granite-3.1-1b-a400m-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46863987553025976
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3279834385375178
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.045317220543806644
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23993288590604026
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33025
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12167553191489362
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GraniteMoeForCausalLM",
- "params_billions": 1.335
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-2b-base/971e6eba-61ff-42e6-9740-1895080ff94f.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-2b-base/971e6eba-61ff-42e6-9740-1895080ff94f.json
deleted file mode 100644
index 4fc5f0a9a62e503732f991a3cbf8657b748362a5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-2b-base/971e6eba-61ff-42e6-9740-1895080ff94f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-2b-base/1762652580.208491",
- "retrieved_timestamp": "1762652580.208492",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibm-granite/granite-3.1-2b-base",
- "developer": "ibm-granite",
- "inference_platform": "unknown",
- "id": "ibm-granite/granite-3.1-2b-base"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35216115462528313
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4047188028918873
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05664652567975831
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3485729166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22506648936170212
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GraniteForCausalLM",
- "params_billions": 2.534
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-2b-instruct/fcdf14a1-900f-4856-aac6-8ed47910f882.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-2b-instruct/fcdf14a1-900f-4856-aac6-8ed47910f882.json
deleted file mode 100644
index 9f4cd39fb01ddee0bac74a36f66e551176b2dbaa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-2b-instruct/fcdf14a1-900f-4856-aac6-8ed47910f882.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-2b-instruct/1762652580.2087219",
- "retrieved_timestamp": "1762652580.2087228",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibm-granite/granite-3.1-2b-instruct",
- "developer": "ibm-granite",
- "inference_platform": "unknown",
- "id": "ibm-granite/granite-3.1-2b-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.628557782240012
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44089858558056544
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15256797583081572
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3605416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28191489361702127
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GraniteForCausalLM",
- "params_billions": 2.534
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-3b-a800m-base/8930e3f9-e0b8-4fb7-91e2-ee34b17cf1eb.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-3b-a800m-base/8930e3f9-e0b8-4fb7-91e2-ee34b17cf1eb.json
deleted file mode 100644
index 692dc704fa5fa2c915b0c5cd47f90084e6766420..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-3b-a800m-base/8930e3f9-e0b8-4fb7-91e2-ee34b17cf1eb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-3b-a800m-base/1762652580.20895",
- "retrieved_timestamp": "1762652580.208951",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibm-granite/granite-3.1-3b-a800m-base",
- "developer": "ibm-granite",
- "inference_platform": "unknown",
- "id": "ibm-granite/granite-3.1-3b-a800m-base"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2996294276962903
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.362822992347764
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.045317220543806644
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3275208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1792719414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GraniteMoeForCausalLM",
- "params_billions": 3.299
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-3b-a800m-instruct/1e0c27fc-8111-4325-8e61-c24c2f8124f7.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-3b-a800m-instruct/1e0c27fc-8111-4325-8e61-c24c2f8124f7.json
deleted file mode 100644
index a8036aa01d51ff61de1feea65cca9c0f8a3885e7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-3b-a800m-instruct/1e0c27fc-8111-4325-8e61-c24c2f8124f7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-3b-a800m-instruct/1762652580.2092001",
- "retrieved_timestamp": "1762652580.2092009",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibm-granite/granite-3.1-3b-a800m-instruct",
- "developer": "ibm-granite",
- "inference_platform": "unknown",
- "id": "ibm-granite/granite-3.1-3b-a800m-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5516462984880118
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4009494521947192
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11404833836858005
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28859060402684567
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3486354166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21476063829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GraniteMoeForCausalLM",
- "params_billions": 3.299
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-8b-base/10cbee10-0344-4da0-a26a-4298fd8f4d11.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-8b-base/10cbee10-0344-4da0-a26a-4298fd8f4d11.json
deleted file mode 100644
index 9b5e19d43d84ccd9d7faf98b4331a41b72121812..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-8b-base/10cbee10-0344-4da0-a26a-4298fd8f4d11.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-8b-base/1762652580.209538",
- "retrieved_timestamp": "1762652580.2095392",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibm-granite/granite-3.1-8b-base",
- "developer": "ibm-granite",
- "inference_platform": "unknown",
- "id": "ibm-granite/granite-3.1-8b-base"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4221033524381973
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4776956677111636
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09441087613293052
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3213087248322148
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3922291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3232214095744681
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GraniteForCausalLM",
- "params_billions": 8.171
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-8b-instruct/6d6b2e81-8b90-4703-aafb-40de92b3ede3.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-8b-instruct/6d6b2e81-8b90-4703-aafb-40de92b3ede3.json
deleted file mode 100644
index 8134ed056ab6c82d208763af520ec8f0cd2c99f4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.1-8b-instruct/6d6b2e81-8b90-4703-aafb-40de92b3ede3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.1-8b-instruct/1762652580.2098079",
- "retrieved_timestamp": "1762652580.2098088",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibm-granite/granite-3.1-8b-instruct",
- "developer": "ibm-granite",
- "inference_platform": "unknown",
- "id": "ibm-granite/granite-3.1-8b-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7207564816908026
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5364460433816018
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21978851963746224
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31208053691275167
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47070833333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3537234042553192
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GraniteForCausalLM",
- "params_billions": 8.171
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.2-2b-instruct/39fd9dc4-88e4-4b52-8527-c1ea692d8ca1.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.2-2b-instruct/39fd9dc4-88e4-4b52-8527-c1ea692d8ca1.json
deleted file mode 100644
index 5bd18834a88824a3a5643c22f6dc4f425e6beb8c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.2-2b-instruct/39fd9dc4-88e4-4b52-8527-c1ea692d8ca1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.2-2b-instruct/1762652580.2100549",
- "retrieved_timestamp": "1762652580.2100558",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibm-granite/granite-3.2-2b-instruct",
- "developer": "ibm-granite",
- "inference_platform": "unknown",
- "id": "ibm-granite/granite-3.2-2b-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6151688630611223
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43872707491212865
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14425981873111782
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3645729166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2783410904255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GraniteForCausalLM",
- "params_billions": 2.534
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.2-8b-instruct/982accb5-ea5c-45bc-8cdd-08edf5e543a1.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.2-8b-instruct/982accb5-ea5c-45bc-8cdd-08edf5e543a1.json
deleted file mode 100644
index 20fcb7de4e48dc2923d76892ef439ad9a892b1a9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-3.2-8b-instruct/982accb5-ea5c-45bc-8cdd-08edf5e543a1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibm-granite_granite-3.2-8b-instruct/1762652580.210291",
- "retrieved_timestamp": "1762652580.2102919",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibm-granite/granite-3.2-8b-instruct",
- "developer": "ibm-granite",
- "inference_platform": "unknown",
- "id": "ibm-granite/granite-3.2-8b-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7274509412802475
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5401759656246116
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23791540785498488
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31543624161073824
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4561979166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35123005319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GraniteForCausalLM",
- "params_billions": 8.171
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-7b-base/2d21a773-8f72-4b7d-ba94-80867127c54a.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-7b-base/2d21a773-8f72-4b7d-ba94-80867127c54a.json
deleted file mode 100644
index 5887bd6b9692542994d9e26bae71013227bbfcd8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-7b-base/2d21a773-8f72-4b7d-ba94-80867127c54a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibm-granite_granite-7b-base/1762652580.2106082",
- "retrieved_timestamp": "1762652580.210609",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibm-granite/granite-7b-base",
- "developer": "ibm-granite",
- "inference_platform": "unknown",
- "id": "ibm-granite/granite-7b-base"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24142719096441884
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34804372716106186
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.015861027190332326
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24580536912751677
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35548958333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18342752659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.738
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-7b-instruct/509f5b3a-6110-4757-a313-80181ecd3228.json b/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-7b-instruct/509f5b3a-6110-4757-a313-80181ecd3228.json
deleted file mode 100644
index 193bbcb40195e57e7ffd9236e2e1aa449e5b8f2f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibm-granite/ibm-granite_granite-7b-instruct/509f5b3a-6110-4757-a313-80181ecd3228.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibm-granite_granite-7b-instruct/1762652580.2108219",
- "retrieved_timestamp": "1762652580.2108219",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibm-granite/granite-7b-instruct",
- "developer": "ibm-granite",
- "inference_platform": "unknown",
- "id": "ibm-granite/granite-7b-instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2972313461615181
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37229529603269523
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02039274924471299
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40199999999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2286402925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 6.738
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibm/ibm_PowerLM-3b/f1eb3ba0-225e-49d5-9509-422702927c9f.json b/leaderboard_data/HFOpenLLMv2/ibm/ibm_PowerLM-3b/f1eb3ba0-225e-49d5-9509-422702927c9f.json
deleted file mode 100644
index 932ff336f5a83840474a30e8bd0b1196a6342051..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibm/ibm_PowerLM-3b/f1eb3ba0-225e-49d5-9509-422702927c9f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibm_PowerLM-3b/1762652580.205445",
- "retrieved_timestamp": "1762652580.205446",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibm/PowerLM-3b",
- "developer": "ibm",
- "inference_platform": "unknown",
- "id": "ibm/PowerLM-3b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33212764354135915
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3679456724439114
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03625377643504532
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2751677852348993
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3562916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.20162898936170212
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "GraniteForCausalLM",
- "params_billions": 3.512
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ibm/ibm_merlinite-7b/7fdbc273-200d-4085-8a03-8f56cde4f2fc.json b/leaderboard_data/HFOpenLLMv2/ibm/ibm_merlinite-7b/7fdbc273-200d-4085-8a03-8f56cde4f2fc.json
deleted file mode 100644
index da47a34a19bf1a366d8de173c1cd32e50aa7be5a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ibm/ibm_merlinite-7b/7fdbc273-200d-4085-8a03-8f56cde4f2fc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ibm_merlinite-7b/1762652580.2057128",
- "retrieved_timestamp": "1762652580.205714",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ibm/merlinite-7b",
- "developer": "ibm",
- "inference_platform": "unknown",
- "id": "ibm/merlinite-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2498703440205322
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.50071326118705
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.02416918429003021
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44115624999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3068484042553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.15-02.10-RP/20c0d1f9-24b8-4993-82f1-d9889c18c56a.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.15-02.10-RP/20c0d1f9-24b8-4993-82f1-d9889c18c56a.json
deleted file mode 100644
index 58da457d8bbd5547c977804fb457a07af779ad98..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.15-02.10-RP/20c0d1f9-24b8-4993-82f1-d9889c18c56a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.15-02.10-RP/1762652580.211034",
- "retrieved_timestamp": "1762652580.211034",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.15-02.10-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.15-02.10-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5343355629729118
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4976384736188401
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05740181268882175
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43197916666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30659906914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.16-02.10-RP/824cb85d-e7a0-421a-994b-c0b178ab8e56.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.16-02.10-RP/824cb85d-e7a0-421a-994b-c0b178ab8e56.json
deleted file mode 100644
index f41be5756ce9e40fa3209ec8a39ba774355aa1ec..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.16-02.10-RP/824cb85d-e7a0-421a-994b-c0b178ab8e56.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.16-02.10-RP/1762652580.211284",
- "retrieved_timestamp": "1762652580.211284",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.16-02.10-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.16-02.10-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5069083365470286
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4945564313654156
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05891238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.433375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3067652925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.17-03.10-RP/2faf039c-9c8e-46db-8472-6b741c451bf1.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.17-03.10-RP/2faf039c-9c8e-46db-8472-6b741c451bf1.json
deleted file mode 100644
index 5fcb994744ed5924c36c66097c67cbc0b12be181..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.17-03.10-RP/2faf039c-9c8e-46db-8472-6b741c451bf1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.17-03.10-RP/1762652580.211494",
- "retrieved_timestamp": "1762652580.211495",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.17-03.10-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.17-03.10-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5123538876846767
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5006815748225494
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06117824773413897
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28187919463087246
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.433375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30851063829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.27-06.11-RP/314c9c7e-0c13-4f6b-be25-d2a2cbc25e9b.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.27-06.11-RP/314c9c7e-0c13-4f6b-be25-d2a2cbc25e9b.json
deleted file mode 100644
index 7512defe87e51190f0141f947ef9c3b35232acac..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.27-06.11-RP/314c9c7e-0c13-4f6b-be25-d2a2cbc25e9b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.27-06.11-RP/1762652580.211702",
- "retrieved_timestamp": "1762652580.211702",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.27-06.11-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.27-06.11-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49182059158588104
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5111654648230625
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05664652567975831
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31208053691275167
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43278125000000006
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3154089095744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.29-06.11-RP/b07e3d05-409f-498a-a324-82c4a592d4dc.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.29-06.11-RP/b07e3d05-409f-498a-a324-82c4a592d4dc.json
deleted file mode 100644
index 3443ccf9b466c7123f17217219541e5f018d1751..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.29-06.11-RP/b07e3d05-409f-498a-a324-82c4a592d4dc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.29-06.11-RP/1762652580.2119",
- "retrieved_timestamp": "1762652580.211901",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.29-06.11-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.29-06.11-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.486050346414181
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5087880173407883
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05664652567975831
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4458958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30925864361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.31-08.11-RP/1fc072c6-ad31-4151-8420-7402b565510d.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.31-08.11-RP/1fc072c6-ad31-4151-8420-7402b565510d.json
deleted file mode 100644
index 57cc3da2cc65180152a258af064ad9310efef9c0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.31-08.11-RP/1fc072c6-ad31-4151-8420-7402b565510d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.31-08.11-RP/1762652580.212094",
- "retrieved_timestamp": "1762652580.212095",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.31-08.11-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.31-08.11-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5145768782386291
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5032134100285419
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06117824773413897
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42766666666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3130817819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.32-10.11-RP/68e99fe4-634e-4462-b1db-d2d40814ff0b.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.32-10.11-RP/68e99fe4-634e-4462-b1db-d2d40814ff0b.json
deleted file mode 100644
index 4c606bc49384b1fa979dd5cbfe4c39c1a36871be..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.32-10.11-RP/68e99fe4-634e-4462-b1db-d2d40814ff0b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.32-10.11-RP/1762652580.2122939",
- "retrieved_timestamp": "1762652580.2122948",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.32-10.11-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.32-10.11-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49154576523623983
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5047695597611622
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0513595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31208053691275167
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4382083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3100066489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.34b-14.11-RP/ed2a47c3-06c7-451b-94cd-8cd42be2ca9c.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.34b-14.11-RP/ed2a47c3-06c7-451b-94cd-8cd42be2ca9c.json
deleted file mode 100644
index bbde5edbc5f3fe61c48ab24775569c44a5dce0c1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.34b-14.11-RP/ed2a47c3-06c7-451b-94cd-8cd42be2ca9c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.34b-14.11-RP/1762652580.2124958",
- "retrieved_timestamp": "1762652580.212497",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.34b-14.11-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.34b-14.11-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47620868185303883
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5067195329696937
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0649546827794562
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4419895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3125
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.34n-14.11-RP/8c6aae5b-6a9b-47fb-908b-6b51159cc9b2.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.34n-14.11-RP/8c6aae5b-6a9b-47fb-908b-6b51159cc9b2.json
deleted file mode 100644
index 8b2f923f2f7b1198a7c3d68a2b4b6ff0ff9a8695..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.34n-14.11-RP/8c6aae5b-6a9b-47fb-908b-6b51159cc9b2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.34n-14.11-RP/1762652580.2127092",
- "retrieved_timestamp": "1762652580.21271",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.34n-14.11-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.34n-14.11-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47865663107222167
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5091090160356474
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07250755287009064
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4379583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31241688829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.37-18.11-RP/774c0461-5e81-436a-9347-7a4cc15ca019.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.37-18.11-RP/774c0461-5e81-436a-9347-7a4cc15ca019.json
deleted file mode 100644
index e6fe45dfbc35845ada946c8bfe2136f326ca52d5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.37-18.11-RP/774c0461-5e81-436a-9347-7a4cc15ca019.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.37-18.11-RP/1762652580.212915",
- "retrieved_timestamp": "1762652580.212916",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.37-18.11-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.37-18.11-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4972162750391184
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5084310833712639
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06419939577039276
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31208053691275167
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43392708333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3143284574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.38-19.11-RP/4d13aaf7-a18d-4bad-ab22-8e08c3f2e16a.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.38-19.11-RP/4d13aaf7-a18d-4bad-ab22-8e08c3f2e16a.json
deleted file mode 100644
index 22807c6c24ca60b872adaa50405535d6218c49ef..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.38-19.11-RP/4d13aaf7-a18d-4bad-ab22-8e08c3f2e16a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.38-19.11-RP/1762652580.213116",
- "retrieved_timestamp": "1762652580.213117",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.38-19.11-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.38-19.11-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44033830237104216
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.510108216407024
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05513595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43671875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31399601063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.39-19.11-RP/780c711f-774b-499e-881e-25dba76273a1.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.39-19.11-RP/780c711f-774b-499e-881e-25dba76273a1.json
deleted file mode 100644
index db9ea9a53e812542842cf9863c9c7a58dde1ba3b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.39-19.11-RP/780c711f-774b-499e-881e-25dba76273a1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.39-19.11-RP/1762652580.2133162",
- "retrieved_timestamp": "1762652580.2133162",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.39-19.11-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.39-19.11-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47565902915375646
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5092985137525424
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04984894259818731
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4341458333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3126662234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.40-20.11-RP/5220bee5-74d3-4730-9fee-4ca488e1a37e.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.40-20.11-RP/5220bee5-74d3-4730-9fee-4ca488e1a37e.json
deleted file mode 100644
index 50573a0e7cbd07bad4bc159363f1f9665f38b305..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.40-20.11-RP/5220bee5-74d3-4730-9fee-4ca488e1a37e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.40-20.11-RP/1762652580.2136111",
- "retrieved_timestamp": "1762652580.213614",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.40-20.11-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.40-20.11-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4762585495374495
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.509308586549064
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06419939577039276
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44459374999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30992353723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.41-22.11-RP/43a30cf0-ccb5-46ce-b520-55ee110002c9.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.41-22.11-RP/43a30cf0-ccb5-46ce-b520-55ee110002c9.json
deleted file mode 100644
index 529cd444b6814466e05a8314ab0b357005aa170b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.41-22.11-RP/43a30cf0-ccb5-46ce-b520-55ee110002c9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.41-22.11-RP/1762652580.213999",
- "retrieved_timestamp": "1762652580.2140002",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.41-22.11-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.41-22.11-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4620451513096362
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4723318624775949
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.030966767371601207
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28691275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45597916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26180186170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.50-16.01-RP/37602e25-bd23-462a-8566-38f3b0fee63d.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.50-16.01-RP/37602e25-bd23-462a-8566-38f3b0fee63d.json
deleted file mode 100644
index e7980420a32942a8eabe686eaa5c59f010098d80..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.50-16.01-RP/37602e25-bd23-462a-8566-38f3b0fee63d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.50-16.01-RP/1762652580.214273",
- "retrieved_timestamp": "1762652580.214274",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.50-16.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.50-16.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43848987353555235
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49804682910006176
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04682779456193353
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4380520833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30693151595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.50.1-16.01-RP/fde6323e-0bfe-4ec9-aa86-4371bbd1645a.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.50.1-16.01-RP/fde6323e-0bfe-4ec9-aa86-4371bbd1645a.json
deleted file mode 100644
index 9e2a1d58ba32deec7916f2843c77cc966a27ceea..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.50.1-16.01-RP/fde6323e-0bfe-4ec9-aa86-4371bbd1645a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.50.1-16.01-RP/1762652580.214615",
- "retrieved_timestamp": "1762652580.214617",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.50.1-16.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.50.1-16.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4829031414424837
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5107472937598788
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06117824773413897
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43274999999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3132480053191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.51-16.01-RP/7a137ac4-8445-4c1a-9203-abc5f4131213.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.51-16.01-RP/7a137ac4-8445-4c1a-9203-abc5f4131213.json
deleted file mode 100644
index 3e4e549361b128bf0db829059719bf237ac7b289..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.51-16.01-RP/7a137ac4-8445-4c1a-9203-abc5f4131213.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.51-16.01-RP/1762652580.214901",
- "retrieved_timestamp": "1762652580.214902",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.51-16.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.51-16.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4430610779398662
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5044464794803141
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0513595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44366666666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30601728723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.51.1-16.01-RP/859a9706-f73b-4426-9c5a-052625d62f5b.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.51.1-16.01-RP/859a9706-f73b-4426-9c5a-052625d62f5b.json
deleted file mode 100644
index f9bab569027631c4ab03bb5310a8429565ece998..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.51.1-16.01-RP/859a9706-f73b-4426-9c5a-052625d62f5b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.51.1-16.01-RP/1762652580.215148",
- "retrieved_timestamp": "1762652580.2151492",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.51.1-16.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.51.1-16.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4573243438520902
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5121083021452105
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06419939577039276
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43938541666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104222074468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.52-16.01-RP/72412b78-cc3e-4652-9034-32c72aee5796.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.52-16.01-RP/72412b78-cc3e-4652-9034-32c72aee5796.json
deleted file mode 100644
index 6a1fed871fc9ccfb16d0aa117ad59712c61af6b5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.52-16.01-RP/72412b78-cc3e-4652-9034-32c72aee5796.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.52-16.01-RP/1762652580.21541",
- "retrieved_timestamp": "1762652580.215412",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.52-16.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.52-16.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4503051902285935
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.504677500406742
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05060422960725076
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43960416666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3080119680851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.52.1-16.01-RP/6bfbd9d6-b376-4169-8e6a-2c3210040e97.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.52.1-16.01-RP/6bfbd9d6-b376-4169-8e6a-2c3210040e97.json
deleted file mode 100644
index fdaa843bda7fed8ff52fc4461df66a6938033616..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.52.1-16.01-RP/6bfbd9d6-b376-4169-8e6a-2c3210040e97.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.52.1-16.01-RP/1762652580.21567",
- "retrieved_timestamp": "1762652580.215671",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.52.1-16.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.52.1-16.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45492626231731803
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.510648341878344
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06268882175226587
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43938541666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31050531914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.53-16.01-RP/6415adfc-35a9-480c-a740-dac02725c8f0.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.53-16.01-RP/6415adfc-35a9-480c-a740-dac02725c8f0.json
deleted file mode 100644
index 01c39db1bc229a1dba19d9ade7e4ad0198a84cc6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.53-16.01-RP/6415adfc-35a9-480c-a740-dac02725c8f0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.53-16.01-RP/1762652580.215963",
- "retrieved_timestamp": "1762652580.2159638",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.53-16.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.53-16.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4741352943523185
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5101675133484068
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0634441087613293
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43274999999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31299867021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.54-17.01-RP/94d01e56-d7d5-4680-b577-ebcc0198ca0c.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.54-17.01-RP/94d01e56-d7d5-4680-b577-ebcc0198ca0c.json
deleted file mode 100644
index adbd2bb7cce5883e3cab8f108acae8d3fedc2656..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.54-17.01-RP/94d01e56-d7d5-4680-b577-ebcc0198ca0c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.54-17.01-RP/1762652580.2162719",
- "retrieved_timestamp": "1762652580.2162728",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.54-17.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.54-17.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4378903531518593
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4853448809638454
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04078549848942598
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48741666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.23262965425531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.55-17.01-RP/a2de66f0-bbd1-40b9-95d3-74e0335b853b.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.55-17.01-RP/a2de66f0-bbd1-40b9-95d3-74e0335b853b.json
deleted file mode 100644
index b74fe190beb28043e4cf0d1f01f57521977205ba..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.55-17.01-RP/a2de66f0-bbd1-40b9-95d3-74e0335b853b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.55-17.01-RP/1762652580.2165911",
- "retrieved_timestamp": "1762652580.2165918",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.55-17.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.55-17.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.496067101956143
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5076567509425027
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28691275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4725
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2657912234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.57-17.01-RP/8d99bf0e-7db0-46f5-96a0-7f977b8cf5f2.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.57-17.01-RP/8d99bf0e-7db0-46f5-96a0-7f977b8cf5f2.json
deleted file mode 100644
index 309d5e462f1476da55ae187dff07a1dd15683ad7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.57-17.01-RP/8d99bf0e-7db0-46f5-96a0-7f977b8cf5f2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.57-17.01-RP/1762652580.216822",
- "retrieved_timestamp": "1762652580.216822",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.57-17.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.57-17.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5151763986223221
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5064080420224116
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0513595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46859375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26512632978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.60-18.01-RP/b5c42995-f1fe-4a7e-90c1-d8fb00cba116.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.60-18.01-RP/b5c42995-f1fe-4a7e-90c1-d8fb00cba116.json
deleted file mode 100644
index 08d1470285f794fcff0981f4f5c29fb83c966b55..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.60-18.01-RP/b5c42995-f1fe-4a7e-90c1-d8fb00cba116.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.60-18.01-RP/1762652580.217043",
- "retrieved_timestamp": "1762652580.2170439",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.60-18.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.60-18.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5374329002601985
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5093724614980669
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05362537764350453
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46704166666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28366023936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.60.1-18.01-RP/8a14ed64-1408-469e-ab8d-05c897904d20.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.60.1-18.01-RP/8a14ed64-1408-469e-ab8d-05c897904d20.json
deleted file mode 100644
index 8e0e391d20a405cd8ce806397d541bab38d3c05a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.60.1-18.01-RP/8a14ed64-1408-469e-ab8d-05c897904d20.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.60.1-18.01-RP/1762652580.217258",
- "retrieved_timestamp": "1762652580.217259",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.60.1-18.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.60.1-18.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5187735209244804
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5119675522804026
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04607250755287009
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4497708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2913896276595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.61-18.01-RP/1c166a10-c176-42c7-9421-750e170f5706.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.61-18.01-RP/1c166a10-c176-42c7-9421-750e170f5706.json
deleted file mode 100644
index e4e55fdfe45d02696d1353805053c0a2edb53195..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.61-18.01-RP/1c166a10-c176-42c7-9421-750e170f5706.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.61-18.01-RP/1762652580.2174668",
- "retrieved_timestamp": "1762652580.2174678",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.61-18.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.61-18.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5441273598496433
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5104839613346842
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04682779456193353
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4697395833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27086103723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.62-18.01-RP/0c5bb530-f59b-4097-8a79-9e4f524385a2.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.62-18.01-RP/0c5bb530-f59b-4097-8a79-9e4f524385a2.json
deleted file mode 100644
index c786d5edbc37de9565220044945136f9b707709d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.62-18.01-RP/0c5bb530-f59b-4097-8a79-9e4f524385a2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.62-18.01-RP/1762652580.21767",
- "retrieved_timestamp": "1762652580.217671",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.62-18.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.62-18.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.536733644507684
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5103327208197285
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05740181268882175
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4537708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28773271276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.62.1-24.01-RP/26ba869e-ae3b-44ef-a215-f94e4e4cb1fc.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.62.1-24.01-RP/26ba869e-ae3b-44ef-a215-f94e4e4cb1fc.json
deleted file mode 100644
index 60bbbb401da67c5e7946600d47646b02577286c2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.62.1-24.01-RP/26ba869e-ae3b-44ef-a215-f94e4e4cb1fc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.62.1-24.01-RP/1762652580.2178729",
- "retrieved_timestamp": "1762652580.2178729",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.62.1-24.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.62.1-24.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5181740005407873
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5108967760246949
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.055891238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45510416666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28706781914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.64-24.01-RP/d7313786-f553-454e-b2c8-62a0162c9339.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.64-24.01-RP/d7313786-f553-454e-b2c8-62a0162c9339.json
deleted file mode 100644
index 2cb41ea4fa5eca54878e8af78d9d2c52255614e3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.64-24.01-RP/d7313786-f553-454e-b2c8-62a0162c9339.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.64-24.01-RP/1762652580.218076",
- "retrieved_timestamp": "1762652580.218076",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.64-24.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.64-24.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5440774921652327
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5059610114856247
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06268882175226587
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4620208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29330119680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.64.1-24.01-RP/359daeb1-3546-473f-801b-c9942fd010aa.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.64.1-24.01-RP/359daeb1-3546-473f-801b-c9942fd010aa.json
deleted file mode 100644
index 519815cebdb7717911e4a7a3c0bac1352cfc6dc7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.64.1-24.01-RP/359daeb1-3546-473f-801b-c9942fd010aa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.64.1-24.01-RP/1762652580.218272",
- "retrieved_timestamp": "1762652580.218272",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.64.1-24.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.64.1-24.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5446770125489258
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5059610114856247
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06268882175226587
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4620208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29330119680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.65-25.01-RP/fa5d2148-c45b-4266-a6a0-11b471273f75.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.65-25.01-RP/fa5d2148-c45b-4266-a6a0-11b471273f75.json
deleted file mode 100644
index 141d3c066bf44b2128b1512a28441d5d5783fbba..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.65-25.01-RP/fa5d2148-c45b-4266-a6a0-11b471273f75.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.65-25.01-RP/1762652580.2184708",
- "retrieved_timestamp": "1762652580.218472",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.65-25.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.65-25.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5029366525264077
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5095976254774931
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0649546827794562
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4339583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29970079787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.66-25.01-RP/b619dad2-fcb2-45ab-b603-ae1da3916eb7.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.66-25.01-RP/b619dad2-fcb2-45ab-b603-ae1da3916eb7.json
deleted file mode 100644
index d861016485d0db71dce04d0b8d64fbb09cdfab96..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.66-25.01-RP/b619dad2-fcb2-45ab-b603-ae1da3916eb7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.66-25.01-RP/1762652580.2186701",
- "retrieved_timestamp": "1762652580.2186701",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.66-25.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.66-25.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.532487134137422
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5128983540188711
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44344791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3039394946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.67-25.01-RP/cf0a4a2d-a104-43cf-ac01-66250e880ff0.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.67-25.01-RP/cf0a4a2d-a104-43cf-ac01-66250e880ff0.json
deleted file mode 100644
index a22ee2bcd08cae5689fcb4c6ffa88af685282fe0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.67-25.01-RP/cf0a4a2d-a104-43cf-ac01-66250e880ff0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.67-25.01-RP/1762652580.21887",
- "retrieved_timestamp": "1762652580.218871",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.67-25.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.67-25.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.536134124123991
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5112894150790012
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07477341389728097
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42788541666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30967420212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.68-25.01-RP/dd7cb16f-0752-4639-aa99-90b9be448295.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.68-25.01-RP/dd7cb16f-0752-4639-aa99-90b9be448295.json
deleted file mode 100644
index cc1cba248d6e48dfa0dac3734a6b5a869fee12a3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.68-25.01-RP/dd7cb16f-0752-4639-aa99-90b9be448295.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.68-25.01-RP/1762652580.2190669",
- "retrieved_timestamp": "1762652580.2190678",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.68-25.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.68-25.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5513714721383707
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5130058094823416
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07250755287009064
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44456249999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011968085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.69-25.01-RP/643da0d0-176a-40dd-b096-5aac8de827e9.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.69-25.01-RP/643da0d0-176a-40dd-b096-5aac8de827e9.json
deleted file mode 100644
index a8a3db1388fe4718397666c78dbde3804e383ec1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.69-25.01-RP/643da0d0-176a-40dd-b096-5aac8de827e9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.69-25.01-RP/1762652580.219263",
- "retrieved_timestamp": "1762652580.219264",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.69-25.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.69-25.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5437527981311808
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5097683665599672
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05664652567975831
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4485625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29654255319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.7-29.09-RP/9c6cf7a1-1a17-4070-9ce3-633461334f42.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.7-29.09-RP/9c6cf7a1-1a17-4070-9ce3-633461334f42.json
deleted file mode 100644
index f38ebd7de40bad7bb78f214fcbef2225ffc3f40d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.7-29.09-RP/9c6cf7a1-1a17-4070-9ce3-633461334f42.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.7-29.09-RP/1762652580.2194638",
- "retrieved_timestamp": "1762652580.219465",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.7-29.09-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.7-29.09-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5175744801570943
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5047661992357916
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06646525679758308
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.287751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4237916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3126662234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.70-25.01-RP/e109acd0-c7e3-4a9f-8e06-c428b95acc83.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.70-25.01-RP/e109acd0-c7e3-4a9f-8e06-c428b95acc83.json
deleted file mode 100644
index 19251c603825da95f28a58edbc573412b907a8dd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.70-25.01-RP/e109acd0-c7e3-4a9f-8e06-c428b95acc83.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.70-25.01-RP/1762652580.2196732",
- "retrieved_timestamp": "1762652580.219674",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.70-25.01-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.70-25.01-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.549797869652522
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.513632436415875
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05966767371601209
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45119791666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2996176861702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.70.1-01.02-RP/ee088f70-5734-4951-8bc0-e0579a053fd2.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.70.1-01.02-RP/ee088f70-5734-4951-8bc0-e0579a053fd2.json
deleted file mode 100644
index a2c0cc19811348ddf34abeb915d48b2b9bac131d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.70.1-01.02-RP/ee088f70-5734-4951-8bc0-e0579a053fd2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.70.1-01.02-RP/1762652580.219877",
- "retrieved_timestamp": "1762652580.219877",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.70.1-01.02-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.70.1-01.02-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5069582042314393
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5059798926804829
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.033987915407854986
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4599166666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2748503989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.73-01.02-RP/ba7bf09f-b7a1-4fd4-b262-4929a81da34a.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.73-01.02-RP/ba7bf09f-b7a1-4fd4-b262-4929a81da34a.json
deleted file mode 100644
index 479feec8059a8fee338766d2a7ad89a458e54020..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.73-01.02-RP/ba7bf09f-b7a1-4fd4-b262-4929a81da34a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.73-01.02-RP/1762652580.220075",
- "retrieved_timestamp": "1762652580.220076",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.73-01.02-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.73-01.02-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.529164838184905
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5103425890792322
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03851963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46639583333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27019614361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.74-02.02-RP/7470c7d4-80fe-4e88-a695-c628f9ed3682.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.74-02.02-RP/7470c7d4-80fe-4e88-a695-c628f9ed3682.json
deleted file mode 100644
index c335ab5ec4b36472fb0c3990e81adcdda8cdef3a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.74-02.02-RP/7470c7d4-80fe-4e88-a695-c628f9ed3682.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.74-02.02-RP/1762652580.220269",
- "retrieved_timestamp": "1762652580.2202702",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.74-02.02-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.74-02.02-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2935344884905384
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4646134965075064
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0015105740181268882
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42804166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.21434507978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.76-02.02-RP/701743bb-1ddf-4810-824a-38959d4a0e02.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.76-02.02-RP/701743bb-1ddf-4810-824a-38959d4a0e02.json
deleted file mode 100644
index 1328b8b45a6327fa39565b6eb6f01f6d0a161a6f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.76-02.02-RP/701743bb-1ddf-4810-824a-38959d4a0e02.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.76-02.02-RP/1762652580.220735",
- "retrieved_timestamp": "1762652580.220737",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.76-02.02-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.76-02.02-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45290274250100837
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5085610407875073
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.014350453172205438
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28691275167785235
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43616666666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2652094414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.77-02.02-RP/0eebefc6-138f-4af5-a8b6-a35c798a38cb.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.77-02.02-RP/0eebefc6-138f-4af5-a8b6-a35c798a38cb.json
deleted file mode 100644
index 336e66630a526bc4b8244f5c9f96238745adaa74..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.77-02.02-RP/0eebefc6-138f-4af5-a8b6-a35c798a38cb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.77-02.02-RP/1762652580.221007",
- "retrieved_timestamp": "1762652580.2210078",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.77-02.02-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.77-02.02-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5309633993359841
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5109257300160749
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03927492447129909
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4765
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29986702127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.78-02.02-RP/ec943fa1-b138-46e8-b1ae-c9a476c73ed1.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.78-02.02-RP/ec943fa1-b138-46e8-b1ae-c9a476c73ed1.json
deleted file mode 100644
index 284dbd106f3c0ce3272eeaa9e12fceeae82aee85..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.78-02.02-RP/ec943fa1-b138-46e8-b1ae-c9a476c73ed1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.78-02.02-RP/1762652580.221266",
- "retrieved_timestamp": "1762652580.221267",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.78-02.02-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.78-02.02-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.405292401937969
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5002126961381052
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04380664652567976
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.468625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2954621010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.80-03.02-RP/847b4e14-a07c-45ed-b2eb-ecea0f80147b.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.80-03.02-RP/847b4e14-a07c-45ed-b2eb-ecea0f80147b.json
deleted file mode 100644
index 093263aafef5d5db109d7c7e6ef0335a133668a6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_Ice0.80-03.02-RP/847b4e14-a07c-45ed-b2eb-ecea0f80147b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_Ice0.80-03.02-RP/1762652580.2214909",
- "retrieved_timestamp": "1762652580.221492",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/Ice0.80-03.02-RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/Ice0.80-03.02-RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5516462984880118
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5097962218679292
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.055891238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2785234899328859
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4923125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2912234042553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceCocoaRP-7b/5427828d-b53d-4e44-82ed-df6a9c0f9a47.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceCocoaRP-7b/5427828d-b53d-4e44-82ed-df6a9c0f9a47.json
deleted file mode 100644
index 251f6ece17acf5695d459dbea3aed2a4d15e4fe5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceCocoaRP-7b/5427828d-b53d-4e44-82ed-df6a9c0f9a47.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_IceCocoaRP-7b/1762652580.2217228",
- "retrieved_timestamp": "1762652580.2217238",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/IceCocoaRP-7b",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/IceCocoaRP-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4962421929369628
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4937902147076245
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05740181268882175
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4197916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3098404255319149
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceCoffeeRP-7b/bf5e2b11-79ce-49ed-947b-fb34110a3802.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceCoffeeRP-7b/bf5e2b11-79ce-49ed-947b-fb34110a3802.json
deleted file mode 100644
index 57290145a04875d318b660719a65b1dc58e0803b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceCoffeeRP-7b/bf5e2b11-79ce-49ed-947b-fb34110a3802.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_IceCoffeeRP-7b/1762652580.2220101",
- "retrieved_timestamp": "1762652580.2220109",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/IceCoffeeRP-7b",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/IceCoffeeRP-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4959174989029109
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48887216244327214
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.054380664652567974
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4159791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2974567819148936
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrinkByFrankensteinV3RP/39325b65-ad12-44ef-a1bf-ffe9e870ced8.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrinkByFrankensteinV3RP/39325b65-ad12-44ef-a1bf-ffe9e870ced8.json
deleted file mode 100644
index ce1ec6f1cfe4db07e62020a5b34af7abf5b2ed69..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrinkByFrankensteinV3RP/39325b65-ad12-44ef-a1bf-ffe9e870ced8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_IceDrinkByFrankensteinV3RP/1762652580.222236",
- "retrieved_timestamp": "1762652580.222236",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/IceDrinkByFrankensteinV3RP",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/IceDrinkByFrankensteinV3RP"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4974911013887596
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4832523723413275
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05060422960725076
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26174496644295303
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4253125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.292719414893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrinkNameGoesHereRP-7b-Model_Stock/b0aaf6e9-ffe3-4de9-b3f5-c33d52b59ed2.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrinkNameGoesHereRP-7b-Model_Stock/b0aaf6e9-ffe3-4de9-b3f5-c33d52b59ed2.json
deleted file mode 100644
index 79ae461d0c2550663995921dd01bef9c7c0a9d6b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrinkNameGoesHereRP-7b-Model_Stock/b0aaf6e9-ffe3-4de9-b3f5-c33d52b59ed2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_IceDrinkNameGoesHereRP-7b-Model_Stock/1762652580.2224698",
- "retrieved_timestamp": "1762652580.2224698",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/IceDrinkNameGoesHereRP-7b-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49684171332065585
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46578646938927254
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04078549848942598
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2684563758389262
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4067395833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2816655585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrinkNameNotFoundRP-7b-Model_Stock/f0e6fa5e-20c2-407d-8301-70d86cb1a51f.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrinkNameNotFoundRP-7b-Model_Stock/f0e6fa5e-20c2-407d-8301-70d86cb1a51f.json
deleted file mode 100644
index 94fa3521b766edcbf854b602676242cc006b00f2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrinkNameNotFoundRP-7b-Model_Stock/f0e6fa5e-20c2-407d-8301-70d86cb1a51f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_IceDrinkNameNotFoundRP-7b-Model_Stock/1762652580.2227032",
- "retrieved_timestamp": "1762652580.2227042",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/IceDrinkNameNotFoundRP-7b-Model_Stock"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5130032757527804
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.502625425089929
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27768456375838924
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4371875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3064328457446808
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrunkCherryRP-7b/c0e3f4ee-52dc-45c3-844a-8cc4e4520f24.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrunkCherryRP-7b/c0e3f4ee-52dc-45c3-844a-8cc4e4520f24.json
deleted file mode 100644
index 920bb78492463fcba3cb0c6cfeabba3cc2784ee9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrunkCherryRP-7b/c0e3f4ee-52dc-45c3-844a-8cc4e4520f24.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_IceDrunkCherryRP-7b/1762652580.222923",
- "retrieved_timestamp": "1762652580.222924",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/IceDrunkCherryRP-7b",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/IceDrunkCherryRP-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48982255969715904
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4846629039263151
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06117824773413897
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27684563758389263
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4291875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3009474734042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrunkenCherryRP-7b/9d1e6b55-aa7c-4fea-8a77-92795c0ee60a.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrunkenCherryRP-7b/9d1e6b55-aa7c-4fea-8a77-92795c0ee60a.json
deleted file mode 100644
index 9ac74b51a02e8b753f8fd0bc04226a218f1770e4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceDrunkenCherryRP-7b/9d1e6b55-aa7c-4fea-8a77-92795c0ee60a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_IceDrunkenCherryRP-7b/1762652580.223197",
- "retrieved_timestamp": "1762652580.223207",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/IceDrunkenCherryRP-7b",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/IceDrunkenCherryRP-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4762585495374495
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.509308586549064
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06419939577039276
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44459374999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30992353723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceEspressoRPv2-7b/ade14c35-442b-4a0a-8345-99b7b58dc194.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceEspressoRPv2-7b/ade14c35-442b-4a0a-8345-99b7b58dc194.json
deleted file mode 100644
index 5deea65a5902e67812fc7378f0177dcee92827b7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceEspressoRPv2-7b/ade14c35-442b-4a0a-8345-99b7b58dc194.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_IceEspressoRPv2-7b/1762652580.223459",
- "retrieved_timestamp": "1762652580.2234602",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/IceEspressoRPv2-7b",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/IceEspressoRPv2-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4977160600539901
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5054890156350785
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.061933534743202415
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28942953020134227
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43306249999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3061003989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceLemonTeaRP-32k-7b/fd90b65b-7b6f-4ca2-93e3-59486c0ee070.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceLemonTeaRP-32k-7b/fd90b65b-7b6f-4ca2-93e3-59486c0ee070.json
deleted file mode 100644
index cfa6c21f9fdcd91d819fbd5b8f3c453b7a8c44c1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceLemonTeaRP-32k-7b/fd90b65b-7b6f-4ca2-93e3-59486c0ee070.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_IceLemonTeaRP-32k-7b/1762652580.2236779",
- "retrieved_timestamp": "1762652580.223679",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/IceLemonTeaRP-32k-7b",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/IceLemonTeaRP-32k-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5212214701436633
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49973852418379305
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.054380664652567974
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42903125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3067652925531915
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceMartiniRP-7b/210bea5c-35de-4bd6-93db-871704add0d6.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceMartiniRP-7b/210bea5c-35de-4bd6-93db-871704add0d6.json
deleted file mode 100644
index 78ea253608bde70f24d8ba9f639d59c4e4ce7844..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceMartiniRP-7b/210bea5c-35de-4bd6-93db-871704add0d6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_IceMartiniRP-7b/1762652580.223922",
- "retrieved_timestamp": "1762652580.223923",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/IceMartiniRP-7b",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/IceMartiniRP-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5044603873278457
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4972421837639585
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06646525679758308
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4344895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3073470744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceNalyvkaRP-7b/95dd235d-6930-48fd-8594-5acb0110be29.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceNalyvkaRP-7b/95dd235d-6930-48fd-8594-5acb0110be29.json
deleted file mode 100644
index 41d711c9258ee0f69c9db6a080b84abf971f0ced..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceNalyvkaRP-7b/95dd235d-6930-48fd-8594-5acb0110be29.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_IceNalyvkaRP-7b/1762652580.224114",
- "retrieved_timestamp": "1762652580.224115",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/IceNalyvkaRP-7b",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/IceNalyvkaRP-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.549797869652522
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.513632436415875
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05966767371601209
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45119791666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2996176861702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeRP-7b/67e351c8-6cca-4982-86e9-e774786c6862.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeRP-7b/67e351c8-6cca-4982-86e9-e774786c6862.json
deleted file mode 100644
index fc300ec4c9ce4a6dae857334ea688ac1c466a2cc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeRP-7b/67e351c8-6cca-4982-86e9-e774786c6862.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_IceSakeRP-7b/1762652580.2243059",
- "retrieved_timestamp": "1762652580.224307",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/IceSakeRP-7b",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/IceSakeRP-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5227950726295119
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5119287057484642
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0634441087613293
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28523489932885904
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41300000000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3176529255319149
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeV4RP-7b/93b5850f-74d0-45cd-977e-5bf6e4dc5d8d.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeV4RP-7b/93b5850f-74d0-45cd-977e-5bf6e4dc5d8d.json
deleted file mode 100644
index 9d46f9a0d3f8faca2ad111be00d2c7c6dd9d3e49..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeV4RP-7b/93b5850f-74d0-45cd-977e-5bf6e4dc5d8d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_IceSakeV4RP-7b/1762652580.224551",
- "retrieved_timestamp": "1762652580.224552",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/IceSakeV4RP-7b",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/IceSakeV4RP-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4634192830578421
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4929557826908731
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.055891238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40819791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31025598404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeV6RP-7b/e9ebbcbf-81d5-494b-95a1-4e79feb42c40.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeV6RP-7b/e9ebbcbf-81d5-494b-95a1-4e79feb42c40.json
deleted file mode 100644
index 97d193f322f3bb83cdfc8643d20d94817fa26dd0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeV6RP-7b/e9ebbcbf-81d5-494b-95a1-4e79feb42c40.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_IceSakeV6RP-7b/1762652580.224776",
- "retrieved_timestamp": "1762652580.224777",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/IceSakeV6RP-7b",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/IceSakeV6RP-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5032613465604596
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49760336362566354
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.061933534743202415
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42001041666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3093417553191489
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeV8RP-7b/dbeb9a8a-53c5-472b-a4b1-1aa0582f8486.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeV8RP-7b/dbeb9a8a-53c5-472b-a4b1-1aa0582f8486.json
deleted file mode 100644
index af0564a96ecf0acdc41a4511ad4b2a7f7e5e4bfd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceSakeV8RP-7b/dbeb9a8a-53c5-472b-a4b1-1aa0582f8486.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_IceSakeV8RP-7b/1762652580.2249868",
- "retrieved_timestamp": "1762652580.224988",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/IceSakeV8RP-7b",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/IceSakeV8RP-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6085741388404988
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48847141337960176
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05966767371601209
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.276006711409396
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3992708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.301030585106383
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceTea21EnergyDrinkRPV13-DPOv3.5/f4d3a112-d529-48f8-a99e-85e9eb02e0c1.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceTea21EnergyDrinkRPV13-DPOv3.5/f4d3a112-d529-48f8-a99e-85e9eb02e0c1.json
deleted file mode 100644
index 45a9602adfa20283638f37eee9221e7abb0835d2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceTea21EnergyDrinkRPV13-DPOv3.5/f4d3a112-d529-48f8-a99e-85e9eb02e0c1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_IceTea21EnergyDrinkRPV13-DPOv3.5/1762652580.2254012",
- "retrieved_timestamp": "1762652580.225402",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3.5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48709978412833504
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4399660013109026
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.03625377643504532
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28439597315436244
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39641666666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.24983377659574468
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceTea21EnergyDrinkRPV13-DPOv3/4b4a9630-c942-445e-b396-4a988d489aa7.json b/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceTea21EnergyDrinkRPV13-DPOv3/4b4a9630-c942-445e-b396-4a988d489aa7.json
deleted file mode 100644
index 87712986f2c7be750f3ae5a1e9fbdd5e194f42c2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/icefog72/icefog72_IceTea21EnergyDrinkRPV13-DPOv3/4b4a9630-c942-445e-b396-4a988d489aa7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/icefog72_IceTea21EnergyDrinkRPV13-DPOv3/1762652580.225198",
- "retrieved_timestamp": "1762652580.2251992",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3",
- "developer": "icefog72",
- "inference_platform": "unknown",
- "id": "icefog72/IceTea21EnergyDrinkRPV13-DPOv3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5263423272472595
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5019587584232624
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0581570996978852
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2835570469798658
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4371875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30560172872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 7.242
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/ilsp/ilsp_Llama-Krikri-8B-Instruct/592bd629-d0bf-48b0-83c6-abfa3731fd14.json b/leaderboard_data/HFOpenLLMv2/ilsp/ilsp_Llama-Krikri-8B-Instruct/592bd629-d0bf-48b0-83c6-abfa3731fd14.json
deleted file mode 100644
index f8bc45a983d921eae60795add7579009b9218c0c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/ilsp/ilsp_Llama-Krikri-8B-Instruct/592bd629-d0bf-48b0-83c6-abfa3731fd14.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/ilsp_Llama-Krikri-8B-Instruct/1762652580.225861",
- "retrieved_timestamp": "1762652580.225861",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "ilsp/Llama-Krikri-8B-Instruct",
- "developer": "ilsp",
- "inference_platform": "unknown",
- "id": "ilsp/Llama-Krikri-8B-Instruct"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.6078748830879843
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.504664191645287
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11782477341389729
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4079791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3312832446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.202
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/inflatebot/inflatebot_MN-12B-Mag-Mell-R1/43f7613d-bd9f-480d-a2ed-dcabf3169944.json b/leaderboard_data/HFOpenLLMv2/inflatebot/inflatebot_MN-12B-Mag-Mell-R1/43f7613d-bd9f-480d-a2ed-dcabf3169944.json
deleted file mode 100644
index c8d7e5e82caed43ba8ec0ef90b5d41581f52d5b5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/inflatebot/inflatebot_MN-12B-Mag-Mell-R1/43f7613d-bd9f-480d-a2ed-dcabf3169944.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/inflatebot_MN-12B-Mag-Mell-R1/1762652580.2261078",
- "retrieved_timestamp": "1762652580.226109",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "inflatebot/MN-12B-Mag-Mell-R1",
- "developer": "inflatebot",
- "inference_platform": "unknown",
- "id": "inflatebot/MN-12B-Mag-Mell-R1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46129602787271107
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5303854975434981
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1299093655589124
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40022916666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34383311170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/informatiker/informatiker_Qwen2-7B-Instruct-abliterated/be1ab009-3aa6-43da-8b8e-11e5287a0370.json b/leaderboard_data/HFOpenLLMv2/informatiker/informatiker_Qwen2-7B-Instruct-abliterated/be1ab009-3aa6-43da-8b8e-11e5287a0370.json
deleted file mode 100644
index 5cec04200724cb36e2854d02b545327629b2a670..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/informatiker/informatiker_Qwen2-7B-Instruct-abliterated/be1ab009-3aa6-43da-8b8e-11e5287a0370.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/informatiker_Qwen2-7B-Instruct-abliterated/1762652580.2263439",
- "retrieved_timestamp": "1762652580.226345",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "informatiker/Qwen2-7B-Instruct-abliterated",
- "developer": "informatiker",
- "inference_platform": "unknown",
- "id": "informatiker/Qwen2-7B-Instruct-abliterated"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5821708622011817
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5534265515936739
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.263595166163142
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011744966442953
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38879166666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3873005319148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 7.616
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/insightfactory/insightfactory_Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model/3986b43c-2752-4a8f-b1e1-c3657734f84b.json b/leaderboard_data/HFOpenLLMv2/insightfactory/insightfactory_Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model/3986b43c-2752-4a8f-b1e1-c3657734f84b.json
deleted file mode 100644
index 86498f734c4be21af61375d64d3b05fc33454c5b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/insightfactory/insightfactory_Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model/3986b43c-2752-4a8f-b1e1-c3657734f84b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/insightfactory_Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model/1762652580.226581",
- "retrieved_timestamp": "1762652580.226582",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model",
- "developer": "insightfactory",
- "inference_platform": "unknown",
- "id": "insightfactory/Llama-3.2-3B-Instruct-unsloth-bnb-4bitlora_model"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45884807865352817
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4146016381618061
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10498489425981873
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27181208053691275
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.349875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2960438829787234
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "",
- "params_billions": 1.933
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/instruction-pretrain/instruction-pretrain_InstructLM-500M/38ba0438-f5ed-434e-af2e-fed71988f7b9.json b/leaderboard_data/HFOpenLLMv2/instruction-pretrain/instruction-pretrain_InstructLM-500M/38ba0438-f5ed-434e-af2e-fed71988f7b9.json
deleted file mode 100644
index 082ae4f7d2fab32c145ace72fe0d43072cfd3a62..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/instruction-pretrain/instruction-pretrain_InstructLM-500M/38ba0438-f5ed-434e-af2e-fed71988f7b9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/instruction-pretrain_InstructLM-500M/1762652580.226826",
- "retrieved_timestamp": "1762652580.226826",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "instruction-pretrain/InstructLM-500M",
- "developer": "instruction-pretrain",
- "inference_platform": "unknown",
- "id": "instruction-pretrain/InstructLM-500M"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1027662158627996
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29408717872529677
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25671140939597314
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3528229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1141123670212766
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "MistralForCausalLM",
- "params_billions": 0.5
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2-1_8b/fc23ef4f-2ef1-4a3e-b029-9d646145e135.json b/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2-1_8b/fc23ef4f-2ef1-4a3e-b029-9d646145e135.json
deleted file mode 100644
index ed4541f3ec566a5d7d06295f04f9d169f4102254..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2-1_8b/fc23ef4f-2ef1-4a3e-b029-9d646145e135.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/internlm_internlm2-1_8b/1762652580.227062",
- "retrieved_timestamp": "1762652580.227063",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "internlm/internlm2-1_8b",
- "developer": "internlm",
- "inference_platform": "unknown",
- "id": "internlm/internlm2-1_8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2197702097102355
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3879732800028095
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.021148036253776436
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2483221476510067
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38128125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15882646276595744
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "InternLM2ForCausalLM",
- "params_billions": 8.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2-7b/d4bba57d-2a3c-4945-ae47-7830840d0259.json b/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2-7b/d4bba57d-2a3c-4945-ae47-7830840d0259.json
deleted file mode 100644
index 7ff338290abbcae3c707ad20932b38f4588a8852..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2-7b/d4bba57d-2a3c-4945-ae47-7830840d0259.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/internlm_internlm2-7b/1762652580.2273018",
- "retrieved_timestamp": "1762652580.227303",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "internlm/internlm2-7b",
- "developer": "internlm",
- "inference_platform": "unknown",
- "id": "internlm/internlm2-7b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.22803680981595092
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5825
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08571428571428572
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33666666666666667
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43999999999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Unknown",
- "params_billions": 0.0
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2-chat-1_8b/767b5c7e-6319-487f-906c-2abca794f884.json b/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2-chat-1_8b/767b5c7e-6319-487f-906c-2abca794f884.json
deleted file mode 100644
index e0c3df475761d0158147aa2ac38c98eb77d6f50e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2-chat-1_8b/767b5c7e-6319-487f-906c-2abca794f884.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/internlm_internlm2-chat-1_8b/1762652580.227562",
- "retrieved_timestamp": "1762652580.227563",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "internlm/internlm2-chat-1_8b",
- "developer": "internlm",
- "inference_platform": "unknown",
- "id": "internlm/internlm2-chat-1_8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2386545477111841
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4452271664119214
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0324773413897281
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.26593959731543626
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36305208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.18392619680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "InternLM2ForCausalLM",
- "params_billions": 1.889
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2_5-1_8b-chat/d37e87e2-53c3-42fa-b78d-04d2819b14d3.json b/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2_5-1_8b-chat/d37e87e2-53c3-42fa-b78d-04d2819b14d3.json
deleted file mode 100644
index 2c03343fc1acb0f556e0f4a3553c0caaf53c9c0c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2_5-1_8b-chat/d37e87e2-53c3-42fa-b78d-04d2819b14d3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/internlm_internlm2_5-1_8b-chat/1762652580.227762",
- "retrieved_timestamp": "1762652580.227763",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "internlm/internlm2_5-1_8b-chat",
- "developer": "internlm",
- "inference_platform": "unknown",
- "id": "internlm/internlm2_5-1_8b-chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38490870889240547
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4488926786996439
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15861027190332327
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2902684563758389
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35939583333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12990359042553193
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "InternLM2ForCausalLM",
- "params_billions": 1.89
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2_5-20b-chat/a651c814-41e2-4951-bb8f-df799cc6e470.json b/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2_5-20b-chat/a651c814-41e2-4951-bb8f-df799cc6e470.json
deleted file mode 100644
index e2b476f9e8f47ccbc81f3261ea8ac48ce2210bc8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2_5-20b-chat/a651c814-41e2-4951-bb8f-df799cc6e470.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/internlm_internlm2_5-20b-chat/1762652580.2279649",
- "retrieved_timestamp": "1762652580.227966",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "internlm/internlm2_5-20b-chat",
- "developer": "internlm",
- "inference_platform": "unknown",
- "id": "internlm/internlm2_5-20b-chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7009977969565198
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7473580533672884
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4078549848942598
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3213087248322148
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4558229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39976728723404253
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "InternLM2ForCausalLM",
- "params_billions": 19.86
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2_5-7b-chat/28245528-26e8-48a8-9cc8-68d7a6389bde.json b/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2_5-7b-chat/28245528-26e8-48a8-9cc8-68d7a6389bde.json
deleted file mode 100644
index 202718af088336a72421d6220831d3c47cc74eb0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/internlm/internlm_internlm2_5-7b-chat/28245528-26e8-48a8-9cc8-68d7a6389bde.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/internlm_internlm2_5-7b-chat/1762652580.2281651",
- "retrieved_timestamp": "1762652580.2281659",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "internlm/internlm2_5-7b-chat",
- "developer": "internlm",
- "inference_platform": "unknown",
- "id": "internlm/internlm2_5-7b-chat"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5538692890419642
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.7073179916851792
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25302114803625375
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34731543624161076
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45938541666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3776595744680851
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "InternLM2ForCausalLM",
- "params_billions": 7.738
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/intervitens/intervitens_mini-magnum-12b-v1.1/8ad974e6-8d4c-45bf-86d0-f701cfc323d5.json b/leaderboard_data/HFOpenLLMv2/intervitens/intervitens_mini-magnum-12b-v1.1/8ad974e6-8d4c-45bf-86d0-f701cfc323d5.json
deleted file mode 100644
index b5b47b8079b0ae5dbb16a486ad09454bf07a0856..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/intervitens/intervitens_mini-magnum-12b-v1.1/8ad974e6-8d4c-45bf-86d0-f701cfc323d5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/intervitens_mini-magnum-12b-v1.1/1762652580.228364",
- "retrieved_timestamp": "1762652580.228365",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "intervitens/mini-magnum-12b-v1.1",
- "developer": "intervitens",
- "inference_platform": "unknown",
- "id": "intervitens/mini-magnum-12b-v1.1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5155509603407846
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.506180035650624
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.061933534743202415
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.28859060402684567
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4004479166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3291223404255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 12.248
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/inumulaisk/inumulaisk_eval_model/e3e4a9b3-ce68-4999-966e-2ef2baf99266.json b/leaderboard_data/HFOpenLLMv2/inumulaisk/inumulaisk_eval_model/e3e4a9b3-ce68-4999-966e-2ef2baf99266.json
deleted file mode 100644
index 2a1e6577b46dcbe382962ced0853cf443ea4287a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/inumulaisk/inumulaisk_eval_model/e3e4a9b3-ce68-4999-966e-2ef2baf99266.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/inumulaisk_eval_model/1762652580.228598",
- "retrieved_timestamp": "1762652580.228599",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "inumulaisk/eval_model",
- "developer": "inumulaisk",
- "inference_platform": "unknown",
- "id": "inumulaisk/eval_model"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.19314197440568803
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35118774303346373
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.297583081570997
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.27936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35796875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16638962765957446
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "Qwen2ForCausalLM",
- "params_billions": 1.777
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/invalid-coder/invalid-coder_Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp/cdb8a900-75f3-4e6b-9d35-5a6791e8acd1.json b/leaderboard_data/HFOpenLLMv2/invalid-coder/invalid-coder_Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp/cdb8a900-75f3-4e6b-9d35-5a6791e8acd1.json
deleted file mode 100644
index 91f345b2ae107f4631da1a158520f1a918390340..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/invalid-coder/invalid-coder_Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp/cdb8a900-75f3-4e6b-9d35-5a6791e8acd1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/invalid-coder_Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp/1762652580.229043",
- "retrieved_timestamp": "1762652580.229047",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp",
- "developer": "invalid-coder",
- "inference_platform": "unknown",
- "id": "invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45547591501660034
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5158439010792586
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.04909365558912387
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3992395833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3145777925531915
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 10.732
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_EtherealRainbow-v0.2-8B/c60869f0-7009-48c9-be41-339335e5ee4e.json b/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_EtherealRainbow-v0.2-8B/c60869f0-7009-48c9-be41-339335e5ee4e.json
deleted file mode 100644
index 3fdad3af234edded25afcc1ba2d6a0c9120ccc4b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_EtherealRainbow-v0.2-8B/c60869f0-7009-48c9-be41-339335e5ee4e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/invisietch_EtherealRainbow-v0.2-8B/1762652580.229454",
- "retrieved_timestamp": "1762652580.229455",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "invisietch/EtherealRainbow-v0.2-8B",
- "developer": "invisietch",
- "inference_platform": "unknown",
- "id": "invisietch/EtherealRainbow-v0.2-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39032988027323057
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5102035205059678
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0823262839879154
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38267708333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36527593085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_EtherealRainbow-v0.3-8B/cc85ba7f-bbc0-43e7-a678-949fd5be8498.json b/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_EtherealRainbow-v0.3-8B/cc85ba7f-bbc0-43e7-a678-949fd5be8498.json
deleted file mode 100644
index b22061cd3de5d71ebaabff32e075b10b9b66ab41..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_EtherealRainbow-v0.3-8B/cc85ba7f-bbc0-43e7-a678-949fd5be8498.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/invisietch_EtherealRainbow-v0.3-8B/1762652580.229776",
- "retrieved_timestamp": "1762652580.2297769",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "invisietch/EtherealRainbow-v0.3-8B",
- "developer": "invisietch",
- "inference_platform": "unknown",
- "id": "invisietch/EtherealRainbow-v0.3-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36822298168858625
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5096758454539693
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.07628398791540786
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39039583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36261635638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_MiS-Firefly-v0.2-22B/6df8e489-865f-4692-a673-6abbf2159d1d.json b/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_MiS-Firefly-v0.2-22B/6df8e489-865f-4692-a673-6abbf2159d1d.json
deleted file mode 100644
index 4831400584a3713b8082fa34a7cd7cb0c0123936..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_MiS-Firefly-v0.2-22B/6df8e489-865f-4692-a673-6abbf2159d1d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/invisietch_MiS-Firefly-v0.2-22B/1762652580.2300959",
- "retrieved_timestamp": "1762652580.2300968",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "invisietch/MiS-Firefly-v0.2-22B",
- "developer": "invisietch",
- "inference_platform": "unknown",
- "id": "invisietch/MiS-Firefly-v0.2-22B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5371082062261466
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5513523591170696
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.16540785498489427
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46937500000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3620345744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "MistralForCausalLM",
- "params_billions": 22.247
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_Nimbus-Miqu-v0.1-70B/c36d07f4-b263-4849-86f9-d3fea355c068.json b/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_Nimbus-Miqu-v0.1-70B/c36d07f4-b263-4849-86f9-d3fea355c068.json
deleted file mode 100644
index 1a01518cd49c8614fc003a7c7bf6ea431fed9955..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/invisietch/invisietch_Nimbus-Miqu-v0.1-70B/c36d07f4-b263-4849-86f9-d3fea355c068.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/invisietch_Nimbus-Miqu-v0.1-70B/1762652580.230321",
- "retrieved_timestamp": "1762652580.230322",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "invisietch/Nimbus-Miqu-v0.1-70B",
- "developer": "invisietch",
- "inference_platform": "unknown",
- "id": "invisietch/Nimbus-Miqu-v0.1-70B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46466819150963884
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.601030667794844
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.06042296072507553
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3389261744966443
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41331249999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3853058510638298
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 68.977
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaredjoss/jaredjoss_pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model/cf6b0824-45c4-4b47-bf23-e5df5673b74e.json b/leaderboard_data/HFOpenLLMv2/jaredjoss/jaredjoss_pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model/cf6b0824-45c4-4b47-bf23-e5df5673b74e.json
deleted file mode 100644
index 974070cb2fd71fa22c58c8af72b61f4322c00372..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaredjoss/jaredjoss_pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model/cf6b0824-45c4-4b47-bf23-e5df5673b74e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaredjoss_pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model/1762652580.230787",
- "retrieved_timestamp": "1762652580.230787",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model",
- "developer": "jaredjoss",
- "inference_platform": "unknown",
- "id": "jaredjoss/pythia-410m-roberta-lr_8e7-kl_01-steps_12000-rlhf-model"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15722172723928066
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2863444769655102
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.0
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.25922818791946306
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3606979166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11685505319148937
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "GPTNeoXForCausalLM",
- "params_billions": 0.407
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2-8B/0064f2f6-672e-478c-9184-e7fd32ad06b8.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2-8B/0064f2f6-672e-478c-9184-e7fd32ad06b8.json
deleted file mode 100644
index 34edb73075b4345a097c0237a132778a40b7af46..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2-8B/0064f2f6-672e-478c-9184-e7fd32ad06b8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Auro-Kosmos-EVAA-v2-8B/1762652580.231028",
- "retrieved_timestamp": "1762652580.231029",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Auro-Kosmos-EVAA-v2-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Auro-Kosmos-EVAA-v2-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4778077722664752
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5447163557182707
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14123867069486404
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31543624161073824
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.425
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38580452127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2.1-8B/4381d7ab-d19f-4fa0-a69a-978af28df8fa.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2.1-8B/4381d7ab-d19f-4fa0-a69a-978af28df8fa.json
deleted file mode 100644
index 50437ac8da85ee911d6c7580debe27e75e9b5060..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2.1-8B/4381d7ab-d19f-4fa0-a69a-978af28df8fa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Auro-Kosmos-EVAA-v2.1-8B/1762652580.231263",
- "retrieved_timestamp": "1762652580.231264",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Auro-Kosmos-EVAA-v2.1-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Auro-Kosmos-EVAA-v2.1-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4665919759571271
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5444200006474947
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14577039274924472
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4316979166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.382563164893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2.2-8B/4e616fc6-8baa-4c9a-9098-b8d108911ad2.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2.2-8B/4e616fc6-8baa-4c9a-9098-b8d108911ad2.json
deleted file mode 100644
index b3536b2f4f13cbf91db25c1450089866797406fd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2.2-8B/4e616fc6-8baa-4c9a-9098-b8d108911ad2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Auro-Kosmos-EVAA-v2.2-8B/1762652580.231466",
- "retrieved_timestamp": "1762652580.231467",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Auro-Kosmos-EVAA-v2.2-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Auro-Kosmos-EVAA-v2.2-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4267997801389203
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5431077158331955
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14123867069486404
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42506249999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37982047872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2.3-8B/9c7ee100-754e-4665-8527-452021a2243b.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2.3-8B/9c7ee100-754e-4665-8527-452021a2243b.json
deleted file mode 100644
index 353deb67195e93b40f39d3babe39ac7e86bce948..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Auro-Kosmos-EVAA-v2.3-8B/9c7ee100-754e-4665-8527-452021a2243b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Auro-Kosmos-EVAA-v2.3-8B/1762652580.231667",
- "retrieved_timestamp": "1762652580.231667",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Auro-Kosmos-EVAA-v2.3-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Auro-Kosmos-EVAA-v2.3-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42712447417297217
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5440818233123913
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13444108761329304
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31208053691275167
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4277916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37840757978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Aurora_faustus-8B/0563ee22-d981-45cb-83f8-7dbdb2734d10.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Aurora_faustus-8B/0563ee22-d981-45cb-83f8-7dbdb2734d10.json
deleted file mode 100644
index e06375e9d465da6dc55e40779ad0a247f0bd417d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Aurora_faustus-8B/0563ee22-d981-45cb-83f8-7dbdb2734d10.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Aurora_faustus-8B/1762652580.231864",
- "retrieved_timestamp": "1762652580.2318652",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-Aurora_faustus-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-Aurora_faustus-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.443236168920686
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5260325661068855
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11253776435045318
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4116979166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38131648936170215
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-8B/746ffa2c-cc95-4d69-9e46-0e8f4febd440.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-8B/746ffa2c-cc95-4d69-9e46-0e8f4febd440.json
deleted file mode 100644
index 451406eeafdc72529f83097af3cab18849b99787..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-8B/746ffa2c-cc95-4d69-9e46-0e8f4febd440.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-8B/1762652580.232065",
- "retrieved_timestamp": "1762652580.232065",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4404635256674513
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5311831227740652
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11782477341389729
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4236666666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3818151595744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Franken-Immersive-v39-8B/f9e1901a-854d-4437-8d49-a6c47799f687.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Franken-Immersive-v39-8B/f9e1901a-854d-4437-8d49-a6c47799f687.json
deleted file mode 100644
index edddc0881745e58ee246175ae398d7527a3ec849..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Franken-Immersive-v39-8B/f9e1901a-854d-4437-8d49-a6c47799f687.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-Franken-Immersive-v39-8B/1762652580.232267",
- "retrieved_timestamp": "1762652580.232268",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-Franken-Immersive-v39-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43779061778303796
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5189720817259138
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12915407854984895
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31543624161073824
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4236354166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3900432180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Franken-v38-8B/8919b3ad-529c-4391-bec3-65b81dad97c3.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Franken-v38-8B/8919b3ad-529c-4391-bec3-65b81dad97c3.json
deleted file mode 100644
index 6a72dd8ccf34305b9b0b4b9af9606d584bf23583..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Franken-v38-8B/8919b3ad-529c-4391-bec3-65b81dad97c3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-Franken-v38-8B/1762652580.2324722",
- "retrieved_timestamp": "1762652580.2324731",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-Franken-v38-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-Franken-v38-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4355676272290855
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5229513322616746
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12915407854984895
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42115624999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3890458776595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Fusion-8B/3030519e-f137-4091-9394-26a0779f0ad9.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Fusion-8B/3030519e-f137-4091-9394-26a0779f0ad9.json
deleted file mode 100644
index cc6f930e4ee141e121840db1e0b82c5748939ca4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Fusion-8B/3030519e-f137-4091-9394-26a0779f0ad9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-Fusion-8B/1762652580.2328691",
- "retrieved_timestamp": "1762652580.2328691",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-Fusion-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-Fusion-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43446832183052075
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5419028777027763
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12915407854984895
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42766666666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38538896276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Fusion-8B/ac41e588-0664-44f5-9fa9-eafd6508078b.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Fusion-8B/ac41e588-0664-44f5-9fa9-eafd6508078b.json
deleted file mode 100644
index 50b56b25b8052a472ea28e927a94b95071876e4f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-Fusion-8B/ac41e588-0664-44f5-9fa9-eafd6508078b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-Fusion-8B/1762652580.23267",
- "retrieved_timestamp": "1762652580.232671",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-Fusion-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-Fusion-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4417623018036587
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5405890148943007
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1351963746223565
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42766666666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3859707446808511
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-8B/eb68e0e3-1e39-4779-bc99-4e1825d9c602.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-8B/eb68e0e3-1e39-4779-bc99-4e1825d9c602.json
deleted file mode 100644
index 20fe2b34b78554e690e44c1c2aac53eeca0f295c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-8B/eb68e0e3-1e39-4779-bc99-4e1825d9c602.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-8B/1762652580.233048",
- "retrieved_timestamp": "1762652580.2330492",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-PRP-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-PRP-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.34052092891306174
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5195634214282913
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08836858006042296
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4301145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3646941489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-light-8B/0d2e1c3f-8ee6-44b0-912a-452e2a5a6da7.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-light-8B/0d2e1c3f-8ee6-44b0-912a-452e2a5a6da7.json
deleted file mode 100644
index 542c718bbae9f2aa2c4206c8606fc1e9f19c1bb6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-light-8B/0d2e1c3f-8ee6-44b0-912a-452e2a5a6da7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-light-8B/1762652580.233289",
- "retrieved_timestamp": "1762652580.23329",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-PRP-light-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-PRP-light-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38238651223198894
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5271029575696119
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11027190332326284
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31208053691275167
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42490625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3781582446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v23-8B/5d5ae047-72d1-4083-8e28-dcce7337ed25.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v23-8B/5d5ae047-72d1-4083-8e28-dcce7337ed25.json
deleted file mode 100644
index 0921bdf147d4b3bbb662ad72d63df8c1f8d22be3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v23-8B/5d5ae047-72d1-4083-8e28-dcce7337ed25.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v23-8B/1762652580.233495",
- "retrieved_timestamp": "1762652580.233495",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-PRP-v23-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-PRP-v23-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4040933611705829
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5289840558524612
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11555891238670694
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43684375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37059507978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v24-8B/e6b62da0-ad6d-431c-8a0e-185c6eddf3da.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v24-8B/e6b62da0-ad6d-431c-8a0e-185c6eddf3da.json
deleted file mode 100644
index 0ee959c8cc338b186ff2be0d4cc70f322970248a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v24-8B/e6b62da0-ad6d-431c-8a0e-185c6eddf3da.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v24-8B/1762652580.233697",
- "retrieved_timestamp": "1762652580.2336981",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-PRP-v24-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-PRP-v24-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42587556572117535
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5276140433113651
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11027190332326284
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42903125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3779089095744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v25-8B/81c8704c-7124-42d1-b320-77e31e35898b.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v25-8B/81c8704c-7124-42d1-b320-77e31e35898b.json
deleted file mode 100644
index 7b383aefcd890010eb1b9f1b9f5b98b266e77332..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v25-8B/81c8704c-7124-42d1-b320-77e31e35898b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v25-8B/1762652580.23391",
- "retrieved_timestamp": "1762652580.23391",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-PRP-v25-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-PRP-v25-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4420869958377106
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5290702582598797
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11858006042296072
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3179530201342282
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4303333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37159242021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v26-8B/6705072a-5a46-49ae-925f-1cf7da1ea288.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v26-8B/6705072a-5a46-49ae-925f-1cf7da1ea288.json
deleted file mode 100644
index 9302dea4debceb31af5d18b92a6e76f1d3ccd769..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v26-8B/6705072a-5a46-49ae-925f-1cf7da1ea288.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v26-8B/1762652580.234126",
- "retrieved_timestamp": "1762652580.234127",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-PRP-v26-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-PRP-v26-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4413877400851962
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5271171047819411
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11329305135951662
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4263645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3793218085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v27-8B/d3dcd3f0-2f43-4b82-ba29-77a69a9b3e8f.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v27-8B/d3dcd3f0-2f43-4b82-ba29-77a69a9b3e8f.json
deleted file mode 100644
index 337938bd665a4e42d28e2a72e40dddccd9d16a0b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v27-8B/d3dcd3f0-2f43-4b82-ba29-77a69a9b3e8f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v27-8B/1762652580.2343428",
- "retrieved_timestamp": "1762652580.234344",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-PRP-v27-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-PRP-v27-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4378404854674486
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5290320010579407
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11933534743202417
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4343333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37549867021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v28-8B/e2aa230d-452e-42f0-a780-af255c62120e.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v28-8B/e2aa230d-452e-42f0-a780-af255c62120e.json
deleted file mode 100644
index 3631eec91bc9aecdfa45981f4233bb5f1a836b63..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v28-8B/e2aa230d-452e-42f0-a780-af255c62120e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v28-8B/1762652580.234553",
- "retrieved_timestamp": "1762652580.234553",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-PRP-v28-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-PRP-v28-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43659157701565177
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5294743678489208
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11706948640483383
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43296874999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.375
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v29-8B/86e94a19-e497-4539-802b-597ce0e0ced0.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v29-8B/86e94a19-e497-4539-802b-597ce0e0ced0.json
deleted file mode 100644
index df7c8e8b8fbbb10dfbb3638a298022339348698b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v29-8B/86e94a19-e497-4539-802b-597ce0e0ced0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v29-8B/1762652580.234771",
- "retrieved_timestamp": "1762652580.234771",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-PRP-v29-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-PRP-v29-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4487315877427448
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5275189525290296
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12009063444108761
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42366666666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37649601063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v30-8B/320c581d-f667-4dab-a32c-bb9f2621e84d.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v30-8B/320c581d-f667-4dab-a32c-bb9f2621e84d.json
deleted file mode 100644
index 6a5ad8efcfd630a8fd0921f824d7cdc2bbf33fc4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v30-8B/320c581d-f667-4dab-a32c-bb9f2621e84d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v30-8B/1762652580.2349901",
- "retrieved_timestamp": "1762652580.234991",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-PRP-v30-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-PRP-v30-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42947268802333366
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5327819889174134
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11782477341389729
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4263333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3937832446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v31-8B/0757cecd-bc5f-4095-90ee-25920ae6670c.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v31-8B/0757cecd-bc5f-4095-90ee-25920ae6670c.json
deleted file mode 100644
index 51f26550d589f666b5031232cc634319b441476a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v31-8B/0757cecd-bc5f-4095-90ee-25920ae6670c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v31-8B/1762652580.235214",
- "retrieved_timestamp": "1762652580.235214",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-PRP-v31-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-PRP-v31-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43986400528375824
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5315048053167004
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11329305135951662
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42506249999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39345079787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v32-8B/f58f0ecc-a059-448d-a2f9-e36b601e2154.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v32-8B/f58f0ecc-a059-448d-a2f9-e36b601e2154.json
deleted file mode 100644
index a49b52f280517949b17bf57ac939ae3efc8e1a2a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v32-8B/f58f0ecc-a059-448d-a2f9-e36b601e2154.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v32-8B/1762652580.235436",
- "retrieved_timestamp": "1762652580.2354372",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-PRP-v32-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-PRP-v32-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4487315877427448
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5292530349260334
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1148036253776435
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42106249999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3776595744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v33-8B/2436838e-2b6a-4c1e-b8c2-ec505d9a4c34.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v33-8B/2436838e-2b6a-4c1e-b8c2-ec505d9a4c34.json
deleted file mode 100644
index cb329fb5c0be9aa6ba451a94c1107b51eddfe15c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v33-8B/2436838e-2b6a-4c1e-b8c2-ec505d9a4c34.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v33-8B/1762652580.23565",
- "retrieved_timestamp": "1762652580.235651",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-PRP-v33-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-PRP-v33-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4301719437758481
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5321153222507468
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11782477341389729
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41839583333333336
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.390874335106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v34-8B/11486e0e-a9e3-43b0-b26e-299a86555d16.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v34-8B/11486e0e-a9e3-43b0-b26e-299a86555d16.json
deleted file mode 100644
index e40167abec9df18e7f846e72a85f3582817f2664..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-PRP-v34-8B/11486e0e-a9e3-43b0-b26e-299a86555d16.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-PRP-v34-8B/1762652580.235871",
- "retrieved_timestamp": "1762652580.235871",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-PRP-v34-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-PRP-v34-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45625052638111324
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.533301459442271
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11253776435045318
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.311241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42372916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3927027925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-8B/75037d12-da94-4c55-8de5-a7cef098d4b0.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-8B/75037d12-da94-4c55-8de5-a7cef098d4b0.json
deleted file mode 100644
index 5ae8689263e0c496de510c725d044bc9ea373ace..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-8B/75037d12-da94-4c55-8de5-a7cef098d4b0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-8B/1762652580.236081",
- "retrieved_timestamp": "1762652580.2360818",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-TSN-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-TSN-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47213726246359655
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5176546480934434
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13444108761329304
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43290625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3816489361702128
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-light-8B/9f0aa20f-8687-4c21-b222-39a322f90842.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-light-8B/9f0aa20f-8687-4c21-b222-39a322f90842.json
deleted file mode 100644
index f0271dcdb260bbfdd2043f6cac9d9946002b04c9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-light-8B/9f0aa20f-8687-4c21-b222-39a322f90842.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-light-8B/1762652580.236298",
- "retrieved_timestamp": "1762652580.236299",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-TSN-light-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-TSN-light-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46849027247702757
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5235021286391058
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1216012084592145
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42893749999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38056848404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v19-8B/91c2897a-3ae3-402b-aadf-26d0b8d746c5.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v19-8B/91c2897a-3ae3-402b-aadf-26d0b8d746c5.json
deleted file mode 100644
index aaf716631c0d5602db6f964d1145076697ab19be..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v19-8B/91c2897a-3ae3-402b-aadf-26d0b8d746c5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-v19-8B/1762652580.236516",
- "retrieved_timestamp": "1762652580.2365172",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-TSN-v19-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-TSN-v19-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4563502617499346
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5316458785173577
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11555891238670694
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4276979166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37898936170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v20-8B/4a60fea6-e0e8-497e-9b29-439e7641e77b.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v20-8B/4a60fea6-e0e8-497e-9b29-439e7641e77b.json
deleted file mode 100644
index c2be55d379a6d8afa649278db91108b67a573820..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v20-8B/4a60fea6-e0e8-497e-9b29-439e7641e77b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-v20-8B/1762652580.236737",
- "retrieved_timestamp": "1762652580.236737",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-TSN-v20-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-TSN-v20-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4423119545029411
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5250468078369915
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12462235649546828
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42103124999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39361702127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v21-8B/d9c819c2-a3f6-481e-bd71-47912aef9847.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v21-8B/d9c819c2-a3f6-481e-bd71-47912aef9847.json
deleted file mode 100644
index da78a06b0b4ded3b7e7038a15ceb0aadb7fb2511..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v21-8B/d9c819c2-a3f6-481e-bd71-47912aef9847.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-v21-8B/1762652580.2369542",
- "retrieved_timestamp": "1762652580.236955",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-TSN-v21-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-TSN-v21-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46701640536000033
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.524796520922724
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11933534743202417
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31208053691275167
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43427083333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3816489361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v22-8B/6e20f902-8752-466c-b8d4-34787fb90fce.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v22-8B/6e20f902-8752-466c-b8d4-34787fb90fce.json
deleted file mode 100644
index 96e1a4ea24c9b0b8fb624ce083f01db96b04ae75..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-TSN-v22-8B/6e20f902-8752-466c-b8d4-34787fb90fce.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-TSN-v22-8B/1762652580.2371762",
- "retrieved_timestamp": "1762652580.2371771",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-TSN-v22-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-TSN-v22-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4673410993940522
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5245863682593667
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11329305135951662
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4303333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38115026595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-8B/d25510e4-6549-4f64-8ec4-37ac8671050c.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-8B/d25510e4-6549-4f64-8ec4-37ac8671050c.json
deleted file mode 100644
index b5033e3b08d1fedba969287bf75674928d215b6f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-8B/d25510e4-6549-4f64-8ec4-37ac8671050c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-8B/1762652580.237391",
- "retrieved_timestamp": "1762652580.237392",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-gamma-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-gamma-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45722460848326885
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5321936191858193
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10498489425981873
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3187919463087248
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4305833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39012632978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-alt-8B/58e279d4-da0f-4e2c-a74d-c51caeaad884.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-alt-8B/58e279d4-da0f-4e2c-a74d-c51caeaad884.json
deleted file mode 100644
index a6c082b084dd5d883fba3c2e32227355d99965b3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-alt-8B/58e279d4-da0f-4e2c-a74d-c51caeaad884.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-alt-8B/1762652580.23761",
- "retrieved_timestamp": "1762652580.23761",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-gamma-alt-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-gamma-alt-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4542270065648036
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5297928701221488
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1095166163141994
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32466442953020136
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42921875
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3896276595744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-light-8B/64c07a98-4f3f-49f7-99de-9963dcfedeba.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-light-8B/64c07a98-4f3f-49f7-99de-9963dcfedeba.json
deleted file mode 100644
index 095e335043061fa022b2d1bd0dd20992d4e36b50..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-light-8B/64c07a98-4f3f-49f7-99de-9963dcfedeba.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-light-8B/1762652580.237838",
- "retrieved_timestamp": "1762652580.2378392",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-gamma-light-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-gamma-light-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45809895521660304
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5376138387743472
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11027190332326284
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42909375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.394281914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-light-alt-8B/abebffbf-48b5-4452-8c7a-bb1175a7e979.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-light-alt-8B/abebffbf-48b5-4452-8c7a-bb1175a7e979.json
deleted file mode 100644
index 14284eed254af7e6779e09973818894a4b86d41f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-light-alt-8B/abebffbf-48b5-4452-8c7a-bb1175a7e979.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-light-alt-8B/1762652580.238084",
- "retrieved_timestamp": "1762652580.238085",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-gamma-light-alt-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-gamma-light-alt-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44535942410581697
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5327145731870764
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11329305135951662
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43045833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39228723404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-ultra-light-8B/1810feae-7a27-4c17-8174-3cd8a143b21f.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-ultra-light-8B/1810feae-7a27-4c17-8174-3cd8a143b21f.json
deleted file mode 100644
index fc4963cae6649ca46b4621668511b7f7a373790d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-ultra-light-8B/1810feae-7a27-4c17-8174-3cd8a143b21f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-ultra-light-8B/1762652580.238316",
- "retrieved_timestamp": "1762652580.238317",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-gamma-ultra-light-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4563003940655239
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5316344937208096
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11782477341389729
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4196979166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3914561170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v13-8B/1fc6ca13-157c-4502-8724-be153afb6347.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v13-8B/1fc6ca13-157c-4502-8724-be153afb6347.json
deleted file mode 100644
index e6ad25d6ff6f352bf9ef062f2ca114092aba5bce..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v13-8B/1fc6ca13-157c-4502-8724-be153afb6347.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v13-8B/1762652580.238605",
- "retrieved_timestamp": "1762652580.238605",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-gamma-v13-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-gamma-v13-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44286160720222345
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5359422335881335
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11178247734138973
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42776041666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3929521276595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v14-8B/c20f5702-24fc-443a-875e-495401776eeb.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v14-8B/c20f5702-24fc-443a-875e-495401776eeb.json
deleted file mode 100644
index 938a402fdfa2c8e514d2b94ef5515d2f5b3ae439..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v14-8B/c20f5702-24fc-443a-875e-495401776eeb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v14-8B/1762652580.23884",
- "retrieved_timestamp": "1762652580.23884",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-gamma-v14-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-gamma-v14-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4380155764482684
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5363063034440413
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11027190332326284
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42772916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3931183510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v15-8B/24e11e0c-fb61-46c1-a05e-c533eb392195.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v15-8B/24e11e0c-fb61-46c1-a05e-c533eb392195.json
deleted file mode 100644
index 1578c90b084868f5fee78750786e5434599f8f11..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v15-8B/24e11e0c-fb61-46c1-a05e-c533eb392195.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v15-8B/1762652580.239064",
- "retrieved_timestamp": "1762652580.2390652",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-gamma-v15-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-gamma-v15-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4654428028741517
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.534326872652317
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11102719033232629
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.311241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42772916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3941156914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v16-8B/15deaa33-87a2-442e-9618-13f5ab6c299e.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v16-8B/15deaa33-87a2-442e-9618-13f5ab6c299e.json
deleted file mode 100644
index f7e8b818026c25aa3d322cbba5040f03c470e950..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v16-8B/15deaa33-87a2-442e-9618-13f5ab6c299e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v16-8B/1762652580.2392871",
- "retrieved_timestamp": "1762652580.239288",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-gamma-v16-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-gamma-v16-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4556510059974202
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5343925058514598
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11706948640483383
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31543624161073824
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4264270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39170545212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v17-8B/bd4cc259-d535-437a-afc5-d74a60154b07.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v17-8B/bd4cc259-d535-437a-afc5-d74a60154b07.json
deleted file mode 100644
index d7edd8bbb171d6ca28b1a15bbdee1f2239338b2f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v17-8B/bd4cc259-d535-437a-afc5-d74a60154b07.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v17-8B/1762652580.239734",
- "retrieved_timestamp": "1762652580.239739",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-gamma-v17-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-gamma-v17-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4462337708391512
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5346666279815969
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11102719033232629
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.311241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42906249999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39228723404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v18-8B/aadb6262-4f31-4681-983c-0d19e8bbc5cd.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v18-8B/aadb6262-4f31-4681-983c-0d19e8bbc5cd.json
deleted file mode 100644
index 0455e03c5fe507235f4ffde056880978a40fd096..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-gamma-v18-8B/aadb6262-4f31-4681-983c-0d19e8bbc5cd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-gamma-v18-8B/1762652580.240138",
- "retrieved_timestamp": "1762652580.240139",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-gamma-v18-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-gamma-v18-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43409376011205825
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5339179190615058
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11102719033232629
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.311241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4316979166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3904587765957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-immersive-sof-v44-8B/41e3ecda-8988-456c-b413-19770e2f05c7.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-immersive-sof-v44-8B/41e3ecda-8988-456c-b413-19770e2f05c7.json
deleted file mode 100644
index d25d470f1c7c474a04a787c7851e8aa8624007bd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-immersive-sof-v44-8B/41e3ecda-8988-456c-b413-19770e2f05c7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-immersive-sof-v44-8B/1762652580.2404292",
- "retrieved_timestamp": "1762652580.24043",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-immersive-sof-v44-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44078821970150317
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5214884907801955
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11858006042296072
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4143958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3887965425531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v10-8B/c57d95da-1b6f-4ce7-8c42-f1129fc1e55e.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v10-8B/c57d95da-1b6f-4ce7-8c42-f1129fc1e55e.json
deleted file mode 100644
index f9fd412487e7b424e9f7e6c18dcbd2e89067d190..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v10-8B/c57d95da-1b6f-4ce7-8c42-f1129fc1e55e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v10-8B/1762652580.2406652",
- "retrieved_timestamp": "1762652580.2406662",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-v10-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-v10-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4261503920708165
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5375875314179012
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12462235649546828
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4223645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38314494680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v11-8B/9a6b85d5-bb26-4832-915e-8b1ac90b0793.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v11-8B/9a6b85d5-bb26-4832-915e-8b1ac90b0793.json
deleted file mode 100644
index af34a68ea3351a4dac508a472cee353980554daf..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v11-8B/9a6b85d5-bb26-4832-915e-8b1ac90b0793.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v11-8B/1762652580.240909",
- "retrieved_timestamp": "1762652580.24091",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-v11-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-v11-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44263664853699297
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5359208647512345
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13217522658610273
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31543624161073824
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4184270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3835605053191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v12-8B/4bcdbab0-7220-40bb-832f-01003f59da0f.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v12-8B/4bcdbab0-7220-40bb-832f-01003f59da0f.json
deleted file mode 100644
index 53dba018158342a64b70af9e2b3439f0090ea6d4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v12-8B/4bcdbab0-7220-40bb-832f-01003f59da0f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v12-8B/1762652580.2411451",
- "retrieved_timestamp": "1762652580.241146",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-v12-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-v12-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43779061778303796
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5348808250181011
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13670694864048338
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42106249999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3835605053191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v2-8B/8f16aed2-8b31-48cc-b874-8d437f26f3db.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v2-8B/8f16aed2-8b31-48cc-b874-8d437f26f3db.json
deleted file mode 100644
index c024ad49ac6f912e551c248f22b70313aa531a4a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v2-8B/8f16aed2-8b31-48cc-b874-8d437f26f3db.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v2-8B/1762652580.241379",
- "retrieved_timestamp": "1762652580.2413802",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-v2-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-v2-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4395891789341171
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5341160060985229
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13217522658610273
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42106249999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3826462765957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v3-8B/262a66ee-04e4-49d5-8bb2-efe0a93801ad.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v3-8B/262a66ee-04e4-49d5-8bb2-efe0a93801ad.json
deleted file mode 100644
index f726072b8cc7f9f4c9e7e6830de3d4b9a1ab600f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v3-8B/262a66ee-04e4-49d5-8bb2-efe0a93801ad.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v3-8B/1762652580.241601",
- "retrieved_timestamp": "1762652580.241602",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-v3-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-v3-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4410630460511443
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5330987974156178
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13293051359516617
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4223958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38214760638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v4-8B/fd2a2a9c-639f-4348-9861-00271ed070b2.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v4-8B/fd2a2a9c-639f-4348-9861-00271ed070b2.json
deleted file mode 100644
index d52230f9c93306ffbf3f96ee132822922d483da6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v4-8B/fd2a2a9c-639f-4348-9861-00271ed070b2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v4-8B/1762652580.241815",
- "retrieved_timestamp": "1762652580.241816",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-v4-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-v4-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4289230353240513
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5336560458316563
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12537764350453173
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41972916666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38173204787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v5-8B/53c89eb1-49ab-4e5f-b1ad-d8e80045a292.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v5-8B/53c89eb1-49ab-4e5f-b1ad-d8e80045a292.json
deleted file mode 100644
index 70cffc61938a91c6515e927b568af49021a664ac..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v5-8B/53c89eb1-49ab-4e5f-b1ad-d8e80045a292.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v5-8B/1762652580.2420359",
- "retrieved_timestamp": "1762652580.2420359",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-v5-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-v5-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44595894448951
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5344958011609363
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12613293051359517
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4223958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3820644946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v6-8B/c0cc1ad5-9e53-45ac-becb-f8ce3e5ac631.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v6-8B/c0cc1ad5-9e53-45ac-becb-f8ce3e5ac631.json
deleted file mode 100644
index f21fb54a4f8b51e874705e8d22abfec7d689ee67..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v6-8B/c0cc1ad5-9e53-45ac-becb-f8ce3e5ac631.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v6-8B/1762652580.242274",
- "retrieved_timestamp": "1762652580.242275",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-v6-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-v6-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4395891789341171
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5379609044843678
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12915407854984895
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4184270833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3820644946808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v7-8B/798c2f08-e10b-4115-bdd5-0d6053d03b60.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v7-8B/798c2f08-e10b-4115-bdd5-0d6053d03b60.json
deleted file mode 100644
index 57bee684b0fae83ec1c9e112a80360dc00f7dbee..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v7-8B/798c2f08-e10b-4115-bdd5-0d6053d03b60.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v7-8B/1762652580.242492",
- "retrieved_timestamp": "1762652580.242493",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-v7-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-v7-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4276741268722545
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5334882804815716
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1336858006042296
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41709375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3835605053191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v8-8B/388ef85a-db27-4851-9e6e-2002a75bc6c7.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v8-8B/388ef85a-db27-4851-9e6e-2002a75bc6c7.json
deleted file mode 100644
index 3757ac2f7c2a358605ccbe65e3a31cc9f706cf25..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v8-8B/388ef85a-db27-4851-9e6e-2002a75bc6c7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v8-8B/1762652580.242712",
- "retrieved_timestamp": "1762652580.242713",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-v8-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-v8-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43834027048232027
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5359208647512345
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13066465256797583
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42103124999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38272938829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v9-8B/cd0c4096-93ee-4a04-83b0-44063770e81b.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v9-8B/cd0c4096-93ee-4a04-83b0-44063770e81b.json
deleted file mode 100644
index 9890fd64a8dec33896453895c2f3321a99820cbe..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v9-8B/cd0c4096-93ee-4a04-83b0-44063770e81b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v9-8B/1762652580.242934",
- "retrieved_timestamp": "1762652580.242935",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-v9-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-v9-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43686640336529303
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5360680608930435
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12764350453172205
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4183958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3819813829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v9-TitanFusion-Mix-8B/69f3e2b2-8918-41a8-abc6-c84c3d674f94.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v9-TitanFusion-Mix-8B/69f3e2b2-8918-41a8-abc6-c84c3d674f94.json
deleted file mode 100644
index 34b1d850b0803777587d7eddb8e9133da5d51f17..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-EVAA-v9-TitanFusion-Mix-8B/69f3e2b2-8918-41a8-abc6-c84c3d674f94.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-EVAA-v9-TitanFusion-Mix-8B/1762652580.243146",
- "retrieved_timestamp": "1762652580.243147",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-EVAA-v9-TitanFusion-Mix-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.428373382624769
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5539931244833417
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1148036253776435
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.287751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43544791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3836436170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-8b/60d775f1-47a9-45ae-9b2f-75b95c9d96cd.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-8b/60d775f1-47a9-45ae-9b2f-75b95c9d96cd.json
deleted file mode 100644
index e45486c4447ed44a38ff917f3b3a0929c7d5ab27..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-8b/60d775f1-47a9-45ae-9b2f-75b95c9d96cd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Elusive-8b/1762652580.243371",
- "retrieved_timestamp": "1762652580.243371",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-Elusive-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-Elusive-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41688275996577967
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5338593917060857
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12613293051359517
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4077916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3759973404255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-VENN-8B/d3af54be-9d9a-4a4a-b03e-3468a801795e.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-VENN-8B/d3af54be-9d9a-4a4a-b03e-3468a801795e.json
deleted file mode 100644
index c9dad4af5538793def650e9165370153ef2eb8c2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-VENN-8B/d3af54be-9d9a-4a4a-b03e-3468a801795e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Elusive-VENN-8B/1762652580.243592",
- "retrieved_timestamp": "1762652580.243593",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-Elusive-VENN-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-Elusive-VENN-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4232525255211727
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5355598563659026
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12462235649546828
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4156979166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3797373670212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-VENN-Asymmetric-8B/e7cf15b2-0347-48a8-bf84-08e27b3688fd.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-VENN-Asymmetric-8B/e7cf15b2-0347-48a8-bf84-08e27b3688fd.json
deleted file mode 100644
index 3e6d0d1e2eb3295aad638b1822186d2f8e65ab8f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-VENN-Asymmetric-8B/e7cf15b2-0347-48a8-bf84-08e27b3688fd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Elusive-VENN-Asymmetric-8B/1762652580.243807",
- "retrieved_timestamp": "1762652580.243807",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-Elusive-VENN-Asymmetric-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4541771388803929
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5312976840812583
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13444108761329304
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42506249999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3842253989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-VENN-Aurora_faustus-8B/8befbe9f-3ab2-4bc8-bd16-5badd2291d5d.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-VENN-Aurora_faustus-8B/8befbe9f-3ab2-4bc8-bd16-5badd2291d5d.json
deleted file mode 100644
index 3fef12559b6266b0dc2f9a7ff7eb8a3dd8a6c0ce..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-Elusive-VENN-Aurora_faustus-8B/8befbe9f-3ab2-4bc8-bd16-5badd2291d5d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-Elusive-VENN-Aurora_faustus-8B/1762652580.244045",
- "retrieved_timestamp": "1762652580.244046",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-Elusive-VENN-Aurora_faustus-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4335441074127758
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5303980337010061
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11253776435045318
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.417
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3794880319148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-VENN-8B/e14cedfb-79a9-446a-ba16-64f378a47b4a.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-VENN-8B/e14cedfb-79a9-446a-ba16-64f378a47b4a.json
deleted file mode 100644
index b3b63f9ae9045306340b5e18ec1bfa213cf5929f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_Kosmos-VENN-8B/e14cedfb-79a9-446a-ba16-64f378a47b4a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_Kosmos-VENN-8B/1762652580.24428",
- "retrieved_timestamp": "1762652580.244281",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/Kosmos-VENN-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/Kosmos-VENN-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.433219413378724
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5317923607687299
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14123867069486404
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29278523489932884
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42109375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3800698138297872
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_PRP-Kosmos-EVAA-8B/84a37d06-2668-4143-8e2f-5a08651f2dfb.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_PRP-Kosmos-EVAA-8B/84a37d06-2668-4143-8e2f-5a08651f2dfb.json
deleted file mode 100644
index 3b5059a71748960e6de1e6fcbd4b55f20dd0cc36..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_PRP-Kosmos-EVAA-8B/84a37d06-2668-4143-8e2f-5a08651f2dfb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_PRP-Kosmos-EVAA-8B/1762652580.244709",
- "retrieved_timestamp": "1762652580.24471",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/PRP-Kosmos-EVAA-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/PRP-Kosmos-EVAA-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36327721556580983
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5237421324582278
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09592145015105741
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.425
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3765791223404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_PRP-Kosmos-EVAA-light-8B/72c9dcd4-ab00-4f36-a1e6-43e241c8b967.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_PRP-Kosmos-EVAA-light-8B/72c9dcd4-ab00-4f36-a1e6-43e241c8b967.json
deleted file mode 100644
index f8c0c2bffe3b64aa3eebe05695ce69e427372fc1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_PRP-Kosmos-EVAA-light-8B/72c9dcd4-ab00-4f36-a1e6-43e241c8b967.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_PRP-Kosmos-EVAA-light-8B/1762652580.2449658",
- "retrieved_timestamp": "1762652580.244967",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/PRP-Kosmos-EVAA-light-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/PRP-Kosmos-EVAA-light-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4321201079801593
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5274582578494339
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11027190332326284
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3221476510067114
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4235416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3631150265957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_TSN-Kosmos-EVAA-8B/9819f2bd-8108-4fc5-9208-ce295d860435.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_TSN-Kosmos-EVAA-8B/9819f2bd-8108-4fc5-9208-ce295d860435.json
deleted file mode 100644
index 757adba05f253f00943aaed65e873229c99d506f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_TSN-Kosmos-EVAA-8B/9819f2bd-8108-4fc5-9208-ce295d860435.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_TSN-Kosmos-EVAA-8B/1762652580.2451851",
- "retrieved_timestamp": "1762652580.245186",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/TSN-Kosmos-EVAA-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/TSN-Kosmos-EVAA-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49032234471203073
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5347376087743225
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14501510574018128
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4173125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.383061835106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_TSN-Kosmos-EVAA-v2-8B/2ce2b8e4-0cd4-4001-8790-ad5e26e3e45c.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_TSN-Kosmos-EVAA-v2-8B/2ce2b8e4-0cd4-4001-8790-ad5e26e3e45c.json
deleted file mode 100644
index 93618df890894c2eb7eca0d14f8338758150746d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_TSN-Kosmos-EVAA-v2-8B/2ce2b8e4-0cd4-4001-8790-ad5e26e3e45c.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_TSN-Kosmos-EVAA-v2-8B/1762652580.2454138",
- "retrieved_timestamp": "1762652580.245415",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/TSN-Kosmos-EVAA-v2-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/TSN-Kosmos-EVAA-v2-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46669171132594844
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.534342097284994
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10800604229607251
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3221476510067114
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41864583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3762466755319149
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-1/b6ca35e1-8680-49e8-a6dd-963214be7411.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-1/b6ca35e1-8680-49e8-a6dd-963214be7411.json
deleted file mode 100644
index dd316fe9d0c8ad1431cbd64d6599b92c61717286..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-1/b6ca35e1-8680-49e8-a6dd-963214be7411.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-1/1762652580.2456498",
- "retrieved_timestamp": "1762652580.245653",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bbb-1",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bbb-1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4864005283758206
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5375556962119087
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13670694864048338
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41706250000000006
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38971077127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-2/155b7412-cc16-45c3-9261-acc9322a0dcc.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-2/155b7412-cc16-45c3-9261-acc9322a0dcc.json
deleted file mode 100644
index 786bf64e9de5e4e7fbc54dfb47b8d40d40646ec4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-2/155b7412-cc16-45c3-9261-acc9322a0dcc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-2/1762652580.2460952",
- "retrieved_timestamp": "1762652580.2460968",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bbb-2",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bbb-2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4077403511571519
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5066789926627318
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11253776435045318
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4144583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.363530585106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-3/94668ddb-d2fb-44e2-8ed7-10179d145366.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-3/94668ddb-d2fb-44e2-8ed7-10179d145366.json
deleted file mode 100644
index d2f7766992adeda3feadce2514fd071c29a49550..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-3/94668ddb-d2fb-44e2-8ed7-10179d145366.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-3/1762652580.24635",
- "retrieved_timestamp": "1762652580.246351",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bbb-3",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bbb-3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.416832892281369
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5157831821186084
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1404833836858006
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.311241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4264895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38563829787234044
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-4/828a6bd0-a205-4327-bc77-2e8a84c0b69e.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-4/828a6bd0-a205-4327-bc77-2e8a84c0b69e.json
deleted file mode 100644
index 807a6673f346f750f1509cbc1c6caacb7bb3b8fd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-4/828a6bd0-a205-4327-bc77-2e8a84c0b69e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-4/1762652580.2465842",
- "retrieved_timestamp": "1762652580.2465851",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bbb-4",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bbb-4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47675833455232114
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.52115051798211
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12764350453172205
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40924999999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3773271276595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-5/8c0a66fb-c87d-489d-b071-b4a599562ead.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-5/8c0a66fb-c87d-489d-b071-b4a599562ead.json
deleted file mode 100644
index 7245abd9d8df05ae4d4ba74c438e22751bf0a15a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-5/8c0a66fb-c87d-489d-b071-b4a599562ead.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-5/1762652580.2468202",
- "retrieved_timestamp": "1762652580.2468212",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bbb-5",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bbb-5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4702888336281067
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5206902586604485
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13972809667673716
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30453020134228187
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3998229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3833942819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-6/ef8025de-fe9f-4a79-97f6-c26c18ab049a.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-6/ef8025de-fe9f-4a79-97f6-c26c18ab049a.json
deleted file mode 100644
index aa08703012056ae1bcac87fd46d4fa8b4686fe22..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-6/ef8025de-fe9f-4a79-97f6-c26c18ab049a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-6/1762652580.247051",
- "retrieved_timestamp": "1762652580.247051",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bbb-6",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bbb-6"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48797413086166924
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5211453749255449
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13897280966767372
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.40515625000000005
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3871343085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-7/a31fbd82-2e21-40e7-a73a-c6351c80bae7.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-7/a31fbd82-2e21-40e7-a73a-c6351c80bae7.json
deleted file mode 100644
index 1c2bcd4025f48827432767dda3532526e9abb895..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bbb-7/a31fbd82-2e21-40e7-a73a-c6351c80bae7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bbb-7/1762652580.2473001",
- "retrieved_timestamp": "1762652580.247304",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bbb-7",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bbb-7"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48280340607366234
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5211089947725771
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13670694864048338
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4038229166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3859707446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-1/15ec7997-1333-43c6-869a-ce4589af56d1.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-1/15ec7997-1333-43c6-869a-ce4589af56d1.json
deleted file mode 100644
index f27ac48084243aee73f049ac0c57c270677c1c84..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-1/15ec7997-1333-43c6-869a-ce4589af56d1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-1/1762652580.2475939",
- "retrieved_timestamp": "1762652580.247595",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-1",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-1"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42842325030917966
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5890155164168736
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.05362537764350453
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4441041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3449135638297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-10/86411dbb-e28b-4e9d-856e-fcc001252fbe.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-10/86411dbb-e28b-4e9d-856e-fcc001252fbe.json
deleted file mode 100644
index 8867badf1dc87b7b74529b8cce9e5581a07a7e12..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-10/86411dbb-e28b-4e9d-856e-fcc001252fbe.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-10/1762652580.247846",
- "retrieved_timestamp": "1762652580.2478468",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-10",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-10"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46184568057199343
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5856025427339699
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11027190332326284
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41985416666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37076130319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-11/804f4be8-a8a9-473f-a898-d71b742a62eb.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-11/804f4be8-a8a9-473f-a898-d71b742a62eb.json
deleted file mode 100644
index aea58f2124a4f9ac1732a5b6eebc3154fa203fe4..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-11/804f4be8-a8a9-473f-a898-d71b742a62eb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-11/1762652580.2481",
- "retrieved_timestamp": "1762652580.2481012",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-11",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-11"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45754930251732073
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5851155912628809
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11782477341389729
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4145520833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3738364361702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-12/736ee66e-bd19-4275-afaf-73c2112c2fbd.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-12/736ee66e-bd19-4275-afaf-73c2112c2fbd.json
deleted file mode 100644
index 165d0a1a8261b5b75704c982816e1f6f35fe71d7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-12/736ee66e-bd19-4275-afaf-73c2112c2fbd.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-12/1762652580.248367",
- "retrieved_timestamp": "1762652580.248368",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-12",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-12"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47338617091539337
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5802489392471556
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11858006042296072
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4144895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37367021276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-13/da5a3c32-371f-44e5-89a7-c9ba6e98664e.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-13/da5a3c32-371f-44e5-89a7-c9ba6e98664e.json
deleted file mode 100644
index 2b65c2a3b090aa859ab0d576ece775313ce12d98..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-13/da5a3c32-371f-44e5-89a7-c9ba6e98664e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-13/1762652580.248588",
- "retrieved_timestamp": "1762652580.248588",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-13",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-13"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4697890486132351
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5777886799254942
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11253776435045318
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41585416666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37300531914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-15/af3bd92d-45f5-4a48-89aa-b8c956209d5a.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-15/af3bd92d-45f5-4a48-89aa-b8c956209d5a.json
deleted file mode 100644
index 05607539ab60e285f95704a581bd273a237326ce..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-15/af3bd92d-45f5-4a48-89aa-b8c956209d5a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-15/1762652580.248791",
- "retrieved_timestamp": "1762652580.2487922",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-15",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-15"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47453534399836883
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5818643001829722
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12462235649546828
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4105208333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37666223404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-16/c98928d3-0d7f-429c-927c-bf8fa432101a.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-16/c98928d3-0d7f-429c-927c-bf8fa432101a.json
deleted file mode 100644
index be97e7cb59a8ffacb8db51eb4e8187acad70048d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-16/c98928d3-0d7f-429c-927c-bf8fa432101a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-16/1762652580.2489972",
- "retrieved_timestamp": "1762652580.248998",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-16",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-16"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4730614768813415
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5783335636603978
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11933534743202417
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4158541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37757646276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-17/787d8040-25c8-4893-b140-cf041260d767.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-17/787d8040-25c8-4893-b140-cf041260d767.json
deleted file mode 100644
index 1a41644f39e0a9a838f6797e271ead0b31eb0a8b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-17/787d8040-25c8-4893-b140-cf041260d767.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-17/1762652580.249204",
- "retrieved_timestamp": "1762652580.2492049",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-17",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-17"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4721871301480073
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5776302177859685
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11329305135951662
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41582291666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37566489361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-18/6aad7ade-7bd0-4515-b4ac-2299c58da098.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-18/6aad7ade-7bd0-4515-b4ac-2299c58da098.json
deleted file mode 100644
index 0ccf4a78410087cceb1835187efbd4ebed296219..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-18/6aad7ade-7bd0-4515-b4ac-2299c58da098.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-18/1762652580.249514",
- "retrieved_timestamp": "1762652580.249515",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-18",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-18"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47246195649764844
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5823837707078298
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11858006042296072
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4184895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37566489361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-19/81914fd7-1410-4b80-9be9-6ebfbb664613.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-19/81914fd7-1410-4b80-9be9-6ebfbb664613.json
deleted file mode 100644
index b85692f950e4b70aaca7728d46c268695b69279c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-19/81914fd7-1410-4b80-9be9-6ebfbb664613.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-19/1762652580.249828",
- "retrieved_timestamp": "1762652580.249829",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-19",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-19"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45842364925065493
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5765774285787187
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11933534743202417
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.417125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3774933510638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-2/3e4b8dcc-9270-4b14-952f-c6b96ee8ce57.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-2/3e4b8dcc-9270-4b14-952f-c6b96ee8ce57.json
deleted file mode 100644
index e2e90f5bd9eae2f3fadac0592bcbfd94bfd0b8f8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-2/3e4b8dcc-9270-4b14-952f-c6b96ee8ce57.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-2/1762652580.250077",
- "retrieved_timestamp": "1762652580.250078",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-2",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-2"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45792386423578324
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5937358907182445
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1027190332326284
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011744966442953
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41864583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3695146276595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-20/cfe4ab09-c772-4617-88b6-77e49553605b.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-20/cfe4ab09-c772-4617-88b6-77e49553605b.json
deleted file mode 100644
index 8d19b38f31fe53256438f6f3bc814b321fde45d9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-20/cfe4ab09-c772-4617-88b6-77e49553605b.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-20/1762652580.2503",
- "retrieved_timestamp": "1762652580.2503",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-20",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-20"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4727367828472896
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.574973333640619
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12009063444108761
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.287751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4105208333333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3768284574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-21/a369ff4f-7fe9-4764-be74-83563dbaf635.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-21/a369ff4f-7fe9-4764-be74-83563dbaf635.json
deleted file mode 100644
index b5e7c12b59330d1231ff7fcd93d3157707e92672..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-21/a369ff4f-7fe9-4764-be74-83563dbaf635.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-21/1762652580.25052",
- "retrieved_timestamp": "1762652580.2505212",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-21",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-21"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47001400727846554
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5738369241857685
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1216012084592145
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4157916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37757646276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-22/f3815ff9-c1bd-4706-a770-4c0b0e8c5d13.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-22/f3815ff9-c1bd-4706-a770-4c0b0e8c5d13.json
deleted file mode 100644
index 46623a261013ba54c825a336858b00aebbc68c1a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-22/f3815ff9-c1bd-4706-a770-4c0b0e8c5d13.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-22/1762652580.250869",
- "retrieved_timestamp": "1762652580.25087",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-22",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-22"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45999725173650363
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.579296884452635
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11858006042296072
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41715625000000006
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3764128989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-23/f4db95ae-8e3d-45ed-9c53-3b30fde0cb3e.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-23/f4db95ae-8e3d-45ed-9c53-3b30fde0cb3e.json
deleted file mode 100644
index 0804217926b265ef06ca1310af38e9b485bdc74f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-23/f4db95ae-8e3d-45ed-9c53-3b30fde0cb3e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-23/1762652580.2511601",
- "retrieved_timestamp": "1762652580.251161",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-23",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-23"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46576749690820357
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.570027700842045
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12009063444108761
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4197291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37957114361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-24/0b27b829-6588-4f7b-80fe-6e6767287a38.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-24/0b27b829-6588-4f7b-80fe-6e6767287a38.json
deleted file mode 100644
index cf5fba37d34a5ba7efb24f09946c2b12167df332..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-24/0b27b829-6588-4f7b-80fe-6e6767287a38.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-24/1762652580.251392",
- "retrieved_timestamp": "1762652580.251392",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-24",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-24"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4715377420799035
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5716684749879075
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1268882175226586
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4157604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38090093085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-25/a0c16d3d-e3f2-4c50-975a-70b69824b3d5.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-25/a0c16d3d-e3f2-4c50-975a-70b69824b3d5.json
deleted file mode 100644
index 355db32fe83ce74a1bbbc5198344efa2bf78664d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-25/a0c16d3d-e3f2-4c50-975a-70b69824b3d5.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-25/1762652580.251633",
- "retrieved_timestamp": "1762652580.251633",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-25",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-25"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47518473206647255
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5705628020556314
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11329305135951662
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2911073825503356
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4117916666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37824135638297873
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-26/0218b7de-bbd7-4196-8fec-3f6fb790a3c1.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-26/0218b7de-bbd7-4196-8fec-3f6fb790a3c1.json
deleted file mode 100644
index 9ab8a4a072fb3ce72be6ffa4ac22dcf1b09705d9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-26/0218b7de-bbd7-4196-8fec-3f6fb790a3c1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-26/1762652580.251851",
- "retrieved_timestamp": "1762652580.251852",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-26",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-26"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4690897928607206
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5734958656360526
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1163141993957704
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4276979166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3771609042553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-27/68435a43-944b-4c66-979b-eb48f7a8e77a.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-27/68435a43-944b-4c66-979b-eb48f7a8e77a.json
deleted file mode 100644
index 5ef6ecb91871e777f1a4cfc56c677387fe4eb318..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-27/68435a43-944b-4c66-979b-eb48f7a8e77a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-27/1762652580.2520802",
- "retrieved_timestamp": "1762652580.252081",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-27",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-27"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4818791916559174
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.571405917910282
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12764350453172205
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.409125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3799035904255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-28/0dc95982-e5b0-4011-9e5b-48af7e3048f0.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-28/0dc95982-e5b0-4011-9e5b-48af7e3048f0.json
deleted file mode 100644
index 3050d6e3ca49818ff0c1468dfa91f85a239b84e8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-28/0dc95982-e5b0-4011-9e5b-48af7e3048f0.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-28/1762652580.252297",
- "retrieved_timestamp": "1762652580.2522979",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-28",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-28"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4785070280189896
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5702617832390487
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12311178247734139
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.413125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3812333776595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-29/012eeeed-c556-460d-82f6-34bdc31da5cf.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-29/012eeeed-c556-460d-82f6-34bdc31da5cf.json
deleted file mode 100644
index 2b6817e50f5554237bd1a59c1b38c9f89e40546f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-29/012eeeed-c556-460d-82f6-34bdc31da5cf.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-29/1762652580.252519",
- "retrieved_timestamp": "1762652580.2525198",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-29",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-29"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46881496651107946
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5670161357895335
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12084592145015106
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4236979166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38189827127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-3/37e59290-b4ea-4a44-bfb0-cdbe781c4d7f.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-3/37e59290-b4ea-4a44-bfb0-cdbe781c4d7f.json
deleted file mode 100644
index 94d8d7c62905a55738ca8fa528b63d217f97c67f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-3/37e59290-b4ea-4a44-bfb0-cdbe781c4d7f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-3/1762652580.2527301",
- "retrieved_timestamp": "1762652580.2527308",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-3",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-3"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4663670172918966
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5890722855221537
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1148036253776435
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41728125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37017952127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-30/6d3a64df-5ebb-4cd8-bd6c-de799d185fe1.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-30/6d3a64df-5ebb-4cd8-bd6c-de799d185fe1.json
deleted file mode 100644
index 3525e43b4d79672b03c2439b174b7b8293dbbf16..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-30/6d3a64df-5ebb-4cd8-bd6c-de799d185fe1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-30/1762652580.252943",
- "retrieved_timestamp": "1762652580.2529438",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-30",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-30"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46664184364153777
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5705838505746653
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12311178247734139
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29278523489932884
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4144270833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3781582446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-31/a637936e-646b-4c21-964a-61e253fd3705.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-31/a637936e-646b-4c21-964a-61e253fd3705.json
deleted file mode 100644
index b4745b2904324fa2b1b2ac65f301eabba14f66e6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-31/a637936e-646b-4c21-964a-61e253fd3705.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-31/1762652580.253162",
- "retrieved_timestamp": "1762652580.253163",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-31",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-31"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4727367828472896
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5665082303171874
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1283987915407855
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4104270833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3819813829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-32/a56c62cc-c318-4de4-b6c7-0fa10229a127.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-32/a56c62cc-c318-4de4-b6c7-0fa10229a127.json
deleted file mode 100644
index 5bb64bb99c85880d28de553a259b954bb585528a..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-32/a56c62cc-c318-4de4-b6c7-0fa10229a127.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-32/1762652580.253373",
- "retrieved_timestamp": "1762652580.2533739",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-32",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-32"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4635943740386619
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5662056335064284
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12462235649546828
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29697986577181207
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4157291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3812333776595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-33/bcab8546-ea69-4207-b69b-ab982b603e55.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-33/bcab8546-ea69-4207-b69b-ab982b603e55.json
deleted file mode 100644
index 7dcf2f445fda1d12dca55e2d2c639321934f53c3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-33/bcab8546-ea69-4207-b69b-ab982b603e55.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-33/1762652580.25359",
- "retrieved_timestamp": "1762652580.253591",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-33",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-33"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4685401401614383
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5652966799156172
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11782477341389729
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4156979166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38081781914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-34/6097086b-8c8b-493e-af1a-71146a2ed566.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-34/6097086b-8c8b-493e-af1a-71146a2ed566.json
deleted file mode 100644
index 365e7bdc660bd5010d818e2c167e2c0561f31bfe..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-34/6097086b-8c8b-493e-af1a-71146a2ed566.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-34/1762652580.253809",
- "retrieved_timestamp": "1762652580.25381",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-34",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-34"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4623953332712758
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5681235912530039
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12084592145015106
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4184583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38040226063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-35/7166192e-42b0-4990-8218-88bb38fd1bdb.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-35/7166192e-42b0-4990-8218-88bb38fd1bdb.json
deleted file mode 100644
index f54e357ef51c95435590be0d1b233719a715ca32..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-35/7166192e-42b0-4990-8218-88bb38fd1bdb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-35/1762652580.2540212",
- "retrieved_timestamp": "1762652580.254022",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-35",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-35"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47213726246359655
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5639648300586834
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12462235649546828
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2953020134228188
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41830208333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3829787234042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-36/3a4f8c97-9f30-44b8-8f79-7f19f90a08d1.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-36/3a4f8c97-9f30-44b8-8f79-7f19f90a08d1.json
deleted file mode 100644
index 556b75506bfad31ca063fba25246cdcaab854877..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-36/3a4f8c97-9f30-44b8-8f79-7f19f90a08d1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-36/1762652580.2542279",
- "retrieved_timestamp": "1762652580.254229",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-36",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-36"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4665919759571271
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5664445599052024
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12386706948640483
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4196354166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.383061835106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-37/19490f78-486d-4325-b31e-af8555c32ea9.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-37/19490f78-486d-4325-b31e-af8555c32ea9.json
deleted file mode 100644
index 283c19ddd0e0e3d6c9266b7782a70ba158a556b1..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-37/19490f78-486d-4325-b31e-af8555c32ea9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-37/1762652580.2544441",
- "retrieved_timestamp": "1762652580.254445",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-37",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-37"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48797413086166924
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.562488460737535
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1216012084592145
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4156354166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3828125
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-38/61e7c49e-abb9-4e38-ba3f-1018db104d83.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-38/61e7c49e-abb9-4e38-ba3f-1018db104d83.json
deleted file mode 100644
index 46a5c321bfc3d60bf3a166ca8262034b0213c864..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-38/61e7c49e-abb9-4e38-ba3f-1018db104d83.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-38/1762652580.2548852",
- "retrieved_timestamp": "1762652580.2548869",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-38",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-38"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46179581288758276
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5658176339168742
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12386706948640483
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2978187919463087
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4117291666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3810671542553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-39/243e6b7b-a34f-44cd-b027-176f877ff8e7.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-39/243e6b7b-a34f-44cd-b027-176f877ff8e7.json
deleted file mode 100644
index 64945462b2e491db1a861ad18181a0542900f839..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-39/243e6b7b-a34f-44cd-b027-176f877ff8e7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-39/1762652580.2552152",
- "retrieved_timestamp": "1762652580.2552161",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-39",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-39"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45759917020173135
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5633012248625926
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12537764350453173
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4262395833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38314494680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-4/85ba493b-05f1-4853-a0ff-44570a7c2a82.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-4/85ba493b-05f1-4853-a0ff-44570a7c2a82.json
deleted file mode 100644
index 68408fe157f9c2039dbfafe1eb5b9e86029350ff..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-4/85ba493b-05f1-4853-a0ff-44570a7c2a82.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-4/1762652580.2554429",
- "retrieved_timestamp": "1762652580.255444",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-4",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-4"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4672912317096415
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5892000111391051
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1095166163141994
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41728125
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3705119680851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-40/56837896-11a6-458b-a17e-9540ab5ae66a.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-40/56837896-11a6-458b-a17e-9540ab5ae66a.json
deleted file mode 100644
index efcca4c37c4957e40a9c02f0260291cc87a368a0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-40/56837896-11a6-458b-a17e-9540ab5ae66a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-40/1762652580.2556531",
- "retrieved_timestamp": "1762652580.2556539",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-40",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-40"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45357761849669986
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5633956317971519
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12462235649546828
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2986577181208054
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4236041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38347739361702127
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-41/db0c4182-7391-40e7-ad6e-5374c8eb28e1.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-41/db0c4182-7391-40e7-ad6e-5374c8eb28e1.json
deleted file mode 100644
index 4d0c9274fa2bf2e3cb0668db36eb72165745a8a3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-41/db0c4182-7391-40e7-ad6e-5374c8eb28e1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-41/1762652580.2558541",
- "retrieved_timestamp": "1762652580.2558541",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-41",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-41"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4739856912990864
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.56138466485423
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12537764350453173
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29278523489932884
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41827083333333337
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38248005319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-42/265e3cbb-484f-4cf7-8994-050f414ecf37.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-42/265e3cbb-484f-4cf7-8994-050f414ecf37.json
deleted file mode 100644
index d9b5b051020ebafa7c0ba1644b4792b6b9e02777..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-42/265e3cbb-484f-4cf7-8994-050f414ecf37.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-42/1762652580.25606",
- "retrieved_timestamp": "1762652580.2560608",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-42",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-42"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4660423232578447
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5645607204696422
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1268882175226586
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42100000000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3812333776595745
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-43/472b725a-2bd5-440a-9768-ba8db6fe6b34.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-43/472b725a-2bd5-440a-9768-ba8db6fe6b34.json
deleted file mode 100644
index 7e6657ac73eceb18070b9814dfaba4acf09c8afa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-43/472b725a-2bd5-440a-9768-ba8db6fe6b34.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-43/1762652580.2562718",
- "retrieved_timestamp": "1762652580.2562718",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-43",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-43"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45999725173650363
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5635240412618795
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12386706948640483
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4156041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3819813829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-44/60c18178-ff40-4e9d-9683-077cc2fa254e.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-44/60c18178-ff40-4e9d-9683-077cc2fa254e.json
deleted file mode 100644
index 93baeba8e3cf95e72861220647fb052436e69d92..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-44/60c18178-ff40-4e9d-9683-077cc2fa254e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-44/1762652580.2565289",
- "retrieved_timestamp": "1762652580.2565298",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-44",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-44"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4706135276621586
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5642775941837409
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1216012084592145
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42487500000000006
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3833942819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-46/6b3c3872-cd4d-4827-8651-6baa9d2423e7.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-46/6b3c3872-cd4d-4827-8651-6baa9d2423e7.json
deleted file mode 100644
index 97f840046b868b74ce113d2cd38c3c24b849e5e0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-46/6b3c3872-cd4d-4827-8651-6baa9d2423e7.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-46/1762652580.2567308",
- "retrieved_timestamp": "1762652580.256732",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-46",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-46"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4727367828472896
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5631697539272891
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12764350453172205
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2961409395973154
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4262395833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3822307180851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-47/9f30c4d4-4a3c-459e-8444-e143ef75f84e.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-47/9f30c4d4-4a3c-459e-8444-e143ef75f84e.json
deleted file mode 100644
index 1f8b48b63f1ea468bcd73942f1802834dd1fb955..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-47/9f30c4d4-4a3c-459e-8444-e143ef75f84e.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-47/1762652580.256935",
- "retrieved_timestamp": "1762652580.2569358",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-47",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-47"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46516797652451053
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5545716016743777
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12764350453172205
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4156041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3854720744680851
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-48/80bbd567-b13e-4ed4-ba85-9098639a3642.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-48/80bbd567-b13e-4ed4-ba85-9098639a3642.json
deleted file mode 100644
index 75e6f76f2f6c9e80bd0345cfd072c6726d900373..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-48/80bbd567-b13e-4ed4-ba85-9098639a3642.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-48/1762652580.257132",
- "retrieved_timestamp": "1762652580.257133",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-48",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-48"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46881496651107946
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5541308128775738
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12537764350453173
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4209375
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3859707446808511
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-49/e574e35a-56cb-471d-b4f1-df0858f5ce66.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-49/e574e35a-56cb-471d-b4f1-df0858f5ce66.json
deleted file mode 100644
index cd1b1654604c7dcf29a86ac3ee500b10c02871cd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-49/e574e35a-56cb-471d-b4f1-df0858f5ce66.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-49/1762652580.257362",
- "retrieved_timestamp": "1762652580.257366",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-49",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-49"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47246195649764844
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5540285004706683
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12009063444108761
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41290625000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38081781914893614
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-5/ec314c97-9bc0-4e14-9d57-d6204e699428.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-5/ec314c97-9bc0-4e14-9d57-d6204e699428.json
deleted file mode 100644
index 67cada033819679c5ca9b4b13fe60c9cfc4f3652..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-5/ec314c97-9bc0-4e14-9d57-d6204e699428.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-5/1762652580.2577002",
- "retrieved_timestamp": "1762652580.2577012",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-5",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-5"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.46516797652451053
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5881569099353959
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10574018126888217
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4186145833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37017952127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-50/980887dd-2948-4e5f-b22c-3cc03057f493.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-50/980887dd-2948-4e5f-b22c-3cc03057f493.json
deleted file mode 100644
index da25aafae4f82a3f519abd241a43fd10e0f7488e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-50/980887dd-2948-4e5f-b22c-3cc03057f493.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-50/1762652580.257925",
- "retrieved_timestamp": "1762652580.257926",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-50",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-50"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47246195649764844
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.555294802866646
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12084592145015106
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.2936241610738255
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41687500000000005
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3842253989361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-51/6d544c96-53c9-43d1-9cb1-6077d7235fff.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-51/6d544c96-53c9-43d1-9cb1-6077d7235fff.json
deleted file mode 100644
index 6d089439fcedcebf86bdaaed916cfe546a489f67..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-51/6d544c96-53c9-43d1-9cb1-6077d7235fff.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-51/1762652580.2581341",
- "retrieved_timestamp": "1762652580.258135",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-51",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-51"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4630447213393795
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5557101784534039
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12386706948640483
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29278523489932884
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41681250000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38314494680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-52/fd3c9666-09bf-4562-b49d-eea905469761.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-52/fd3c9666-09bf-4562-b49d-eea905469761.json
deleted file mode 100644
index 38dbecaca4729a6de20ddfd8bd62be918692afdc..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-52/fd3c9666-09bf-4562-b49d-eea905469761.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-52/1762652580.258348",
- "retrieved_timestamp": "1762652580.258349",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-52",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-52"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45362748618111054
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.544409095161705
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12009063444108761
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29194630872483224
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41690625000000003
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38430851063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-53/978d4a27-17c7-4f87-b3e5-27b00ffa4d80.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-53/978d4a27-17c7-4f87-b3e5-27b00ffa4d80.json
deleted file mode 100644
index 67f420e213877de2d0d653d9639f243d4d496172..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-53/978d4a27-17c7-4f87-b3e5-27b00ffa4d80.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-53/1762652580.25855",
- "retrieved_timestamp": "1762652580.2585511",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-53",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-53"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4779573753197073
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5494367702137035
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1268882175226586
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29865771812080544
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4196041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38580452127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-54/9a2d7235-84cf-43f6-8855-68d0bf85e6e3.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-54/9a2d7235-84cf-43f6-8855-68d0bf85e6e3.json
deleted file mode 100644
index 795a34aea3b7e8ccc207d589b97d926d9125878c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-54/9a2d7235-84cf-43f6-8855-68d0bf85e6e3.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-54/1762652580.258788",
- "retrieved_timestamp": "1762652580.258792",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-54",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-54"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48405231452545916
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5547738488653888
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12915407854984895
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29446308724832215
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4155416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38248005319148937
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-55/7c388cc5-fb2f-48ba-967c-a931fcb25a42.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-55/7c388cc5-fb2f-48ba-967c-a931fcb25a42.json
deleted file mode 100644
index d2c0d8478e78d3c5616facffd734bca182c63e1c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-55/7c388cc5-fb2f-48ba-967c-a931fcb25a42.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-55/1762652580.259115",
- "retrieved_timestamp": "1762652580.259116",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-55",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-55"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47093822169621047
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5549641462109072
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1283987915407855
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42220833333333335
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3846409574468085
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-56/348c8f2b-807f-464b-832e-0049f8329b86.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-56/348c8f2b-807f-464b-832e-0049f8329b86.json
deleted file mode 100644
index f24b442cb8884e0ac5454c8311486b52cad69c8e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-56/348c8f2b-807f-464b-832e-0049f8329b86.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-56/1762652580.2593641",
- "retrieved_timestamp": "1762652580.259365",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-56",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-56"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45999725173650363
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5446903231355648
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12311178247734139
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30033557046979864
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4116041666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3843916223404255
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-57/fab7388c-87ed-4108-ba4d-e1621925f264.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-57/fab7388c-87ed-4108-ba4d-e1621925f264.json
deleted file mode 100644
index 7a9352ac7398212308b7014eaa15dae87e8cc754..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-57/fab7388c-87ed-4108-ba4d-e1621925f264.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-57/1762652580.259624",
- "retrieved_timestamp": "1762652580.259625",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-57",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-57"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44051339335186196
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5424621834237494
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12613293051359517
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42103124999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3896276595744681
- }
- }
- ],
- "additional_details": {
- "precision": "float16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-58/a9c1b649-8850-43d1-b5db-feefd0b8d0b4.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-58/a9c1b649-8850-43d1-b5db-feefd0b8d0b4.json
deleted file mode 100644
index 1b4ed78771a9f54a91bf0aff6afd1f41e5ed8f99..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-58/a9c1b649-8850-43d1-b5db-feefd0b8d0b4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-58/1762652580.259867",
- "retrieved_timestamp": "1762652580.259868",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-58",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-58"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4630447213393795
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5446322106157867
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13217522658610273
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4183333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3896276595744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-59/974b1542-8716-4ea3-b097-f9893c9c9656.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-59/974b1542-8716-4ea3-b097-f9893c9c9656.json
deleted file mode 100644
index 22679a582820d2c0e0fc3c3b2cb28eb54bcd475b..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-59/974b1542-8716-4ea3-b097-f9893c9c9656.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-59/1762652580.260088",
- "retrieved_timestamp": "1762652580.2600892",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-59",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-59"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43414362779646887
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5511531646170439
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1540785498489426
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31543624161073824
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41700000000000004
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3838098404255319
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-6/e8dfd77c-e2c8-42ef-b341-5476411d038d.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-6/e8dfd77c-e2c8-42ef-b341-5476411d038d.json
deleted file mode 100644
index b3acf6dcbb01abbb8e68e3637781d3b47e5bfae9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-6/e8dfd77c-e2c8-42ef-b341-5476411d038d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-6/1762652580.260308",
- "retrieved_timestamp": "1762652580.260309",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-6",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-6"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4620706392372239
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5890658635262072
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10876132930513595
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41991666666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36976396276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-60/16d14b95-fe8b-4e1f-94e1-65d966ba24d6.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-60/16d14b95-fe8b-4e1f-94e1-65d966ba24d6.json
deleted file mode 100644
index 2240aec4cc29d38b82cca12b7b0e34e9cc2308d7..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-60/16d14b95-fe8b-4e1f-94e1-65d966ba24d6.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-60/1762652580.2605288",
- "retrieved_timestamp": "1762652580.2605288",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-60",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-60"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42070484093316846
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5368509826419269
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1578549848942598
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32550335570469796
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42890625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3689328457446808
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-61/00b1b367-c4eb-4048-b80d-a8253e7c2048.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-61/00b1b367-c4eb-4048-b80d-a8253e7c2048.json
deleted file mode 100644
index b7d485d9cadad1696b3e47bee7b66c1eabcefc2c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-61/00b1b367-c4eb-4048-b80d-a8253e7c2048.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-61/1762652580.260743",
- "retrieved_timestamp": "1762652580.260743",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-61",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-61"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42467652495378927
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5271029876122725
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.17069486404833836
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3187919463087248
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4355729166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3679355053191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-62/85bd08bf-bdc3-42fb-b8f9-3d83e32921bc.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-62/85bd08bf-bdc3-42fb-b8f9-3d83e32921bc.json
deleted file mode 100644
index fa9c30990860556b24cab0d98db985db08b82ca3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-62/85bd08bf-bdc3-42fb-b8f9-3d83e32921bc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-62/1762652580.260948",
- "retrieved_timestamp": "1762652580.260949",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-62",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-62"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41498446344587914
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5379352222621877
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1623867069486405
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42890625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3719248670212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-63/c9df2e30-5e2d-42cc-8597-dc354602350a.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-63/c9df2e30-5e2d-42cc-8597-dc354602350a.json
deleted file mode 100644
index 59deb5c66a70b677e7ac23593759d8d824dbb765..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-63/c9df2e30-5e2d-42cc-8597-dc354602350a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-63/1762652580.261157",
- "retrieved_timestamp": "1762652580.261157",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-63",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-63"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43077146415954115
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49171126396743653
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11102719033232629
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4312604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3248005319148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-64/90830134-43d5-4d0c-9a93-4be2c1c7dba8.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-64/90830134-43d5-4d0c-9a93-4be2c1c7dba8.json
deleted file mode 100644
index 6705ea7f644def317565b497caf9aba3a162cffa..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-64/90830134-43d5-4d0c-9a93-4be2c1c7dba8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-64/1762652580.261374",
- "retrieved_timestamp": "1762652580.261375",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-64",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-64"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41401038134372353
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5359944334653838
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.15483383685800603
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3213087248322148
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4355416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3692652925531915
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-7/b63d1462-f84b-4d20-86d6-1a54cf4eb81f.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-7/b63d1462-f84b-4d20-86d6-1a54cf4eb81f.json
deleted file mode 100644
index 5721cb645779158317d20848e58f1b231e896f75..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-7/b63d1462-f84b-4d20-86d6-1a54cf4eb81f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-7/1762652580.261788",
- "retrieved_timestamp": "1762652580.261791",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-7",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-7"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4623953332712758
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5860594415302606
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11404833836858005
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41191666666666665
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3715093085106383
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-8/f6dced28-f64c-4995-88b1-ac9a82903de2.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-8/f6dced28-f64c-4995-88b1-ac9a82903de2.json
deleted file mode 100644
index 47959dd0898e0aeb67b161f4731ea6024cf0b1e5..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-8/f6dced28-f64c-4995-88b1-ac9a82903de2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-8/1762652580.262149",
- "retrieved_timestamp": "1762652580.262152",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-8",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-8"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45967255770245175
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5899505025903907
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11782477341389729
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011744966442953
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4265208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37200797872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-9/956d92e9-51fb-4770-8687-6003f9594345.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-9/956d92e9-51fb-4770-8687-6003f9594345.json
deleted file mode 100644
index 80a3dc66154df762edceb92294cae766178774e3..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_bh-9/956d92e9-51fb-4770-8687-6003f9594345.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_bh-9/1762652580.262652",
- "retrieved_timestamp": "1762652580.2626529",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/bh-9",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/bh-9"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4508548429278758
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5850048697918168
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11555891238670694
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30201342281879195
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4145833333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3702626329787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_dp-6-8b/5c61d4f5-25a0-4ffe-a9d2-2a33d8bbd717.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_dp-6-8b/5c61d4f5-25a0-4ffe-a9d2-2a33d8bbd717.json
deleted file mode 100644
index 56b878294b850868a4d973f6731ca45ff1792521..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_dp-6-8b/5c61d4f5-25a0-4ffe-a9d2-2a33d8bbd717.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_dp-6-8b/1762652580.263117",
- "retrieved_timestamp": "1762652580.2631192",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/dp-6-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/dp-6-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4805804155197099
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5299697041031141
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13293051359516617
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44338541666666664
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38971077127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_dp-7-8b/44d85302-1af8-48ef-aebe-a9512c5bc387.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_dp-7-8b/44d85302-1af8-48ef-aebe-a9512c5bc387.json
deleted file mode 100644
index f96126fb64d82faf067979c62217d49a7a64cf73..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_dp-7-8b/44d85302-1af8-48ef-aebe-a9512c5bc387.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_dp-7-8b/1762652580.2634509",
- "retrieved_timestamp": "1762652580.2634518",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/dp-7-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/dp-7-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44983089314130953
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5290850650389306
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12613293051359517
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44075
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3933676861702128
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_ek-6/a05ce252-928c-4482-95f7-f4c0fc2c7c10.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_ek-6/a05ce252-928c-4482-95f7-f4c0fc2c7c10.json
deleted file mode 100644
index 8d60b81d5ef07a2c708d62c9b2b755c67c982bcd..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_ek-6/a05ce252-928c-4482-95f7-f4c0fc2c7c10.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_ek-6/1762652580.2637498",
- "retrieved_timestamp": "1762652580.263751",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/ek-6",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/ek-6"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4642437621067656
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5219292795769993
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13217522658610273
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4143645833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3861369680851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_ek-7/23127691-ff90-433f-97d2-322e1191d821.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_ek-7/23127691-ff90-433f-97d2-322e1191d821.json
deleted file mode 100644
index d1547609fa4f8aa3c2d2a1ac1d61120b37914239..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_ek-7/23127691-ff90-433f-97d2-322e1191d821.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_ek-7/1762652580.264135",
- "retrieved_timestamp": "1762652580.2641358",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/ek-7",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/ek-7"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47670846686791046
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5194098090521417
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13293051359516617
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41706249999999995
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38871343085106386
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-1-8b/91d65b2a-a96a-467b-9e5c-9efa28d7fd96.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-1-8b/91d65b2a-a96a-467b-9e5c-9efa28d7fd96.json
deleted file mode 100644
index 13897e64eb7b41edc84e96752172f5a45e0b30d0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-1-8b/91d65b2a-a96a-467b-9e5c-9efa28d7fd96.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_f-1-8b/1762652580.264415",
- "retrieved_timestamp": "1762652580.264416",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/f-1-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/f-1-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49826571275327247
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5140825686172996
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1283987915407855
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45268749999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39070811170212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-2-8b/c63fc798-cf74-4767-ba95-6353b6761bcc.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-2-8b/c63fc798-cf74-4767-ba95-6353b6761bcc.json
deleted file mode 100644
index feb2695973a969a2b198032b0c915a81110f19a2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-2-8b/c63fc798-cf74-4767-ba95-6353b6761bcc.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_f-2-8b/1762652580.264705",
- "retrieved_timestamp": "1762652580.2647061",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/f-2-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/f-2-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48237897667078905
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5294150378468933
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11706948640483383
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4500520833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39619348404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-3-8b/5ba1e4d3-29d4-4337-bd10-9e1a5df29af4.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-3-8b/5ba1e4d3-29d4-4337-bd10-9e1a5df29af4.json
deleted file mode 100644
index 180b125318c0cbae6cbc4ba5fbb31579d37b3575..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-3-8b/5ba1e4d3-29d4-4337-bd10-9e1a5df29af4.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_f-3-8b/1762652580.264997",
- "retrieved_timestamp": "1762652580.264998",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/f-3-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/f-3-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4803055891700687
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5274906581043712
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1216012084592145
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44208333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39544547872340424
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-4-8b/a98ec95c-4af0-4b55-adbc-06e5ceecd00f.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-4-8b/a98ec95c-4af0-4b55-adbc-06e5ceecd00f.json
deleted file mode 100644
index 09b854811be943233548252242eec4df10a61f09..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-4-8b/a98ec95c-4af0-4b55-adbc-06e5ceecd00f.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_f-4-8b/1762652580.265391",
- "retrieved_timestamp": "1762652580.2653928",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/f-4-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/f-4-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4797060687863757
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5288622486396436
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1148036253776435
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.45141666666666663
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39561170212765956
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-5-8b/4dd614dc-b68b-456c-ac55-f2221a479caa.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-5-8b/4dd614dc-b68b-456c-ac55-f2221a479caa.json
deleted file mode 100644
index d4d7ddc10bac7880124fb337ceafe8d3856bcdd8..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-5-8b/4dd614dc-b68b-456c-ac55-f2221a479caa.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_f-5-8b/1762652580.265783",
- "retrieved_timestamp": "1762652580.265785",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/f-5-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/f-5-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5043606519590242
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5313273519630752
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12386706948640483
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3087248322147651
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4460520833333334
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39486369680851063
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-6-8b/2a71c7d7-8ae6-45e7-ab7f-54f7d31dd131.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-6-8b/2a71c7d7-8ae6-45e7-ab7f-54f7d31dd131.json
deleted file mode 100644
index ca5975e419df86fa73a32e6a1cad35a87d3353a6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-6-8b/2a71c7d7-8ae6-45e7-ab7f-54f7d31dd131.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_f-6-8b/1762652580.2661529",
- "retrieved_timestamp": "1762652580.266155",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/f-6-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/f-6-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.48460196722474147
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.524094753042471
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11933534743202417
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44735416666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3939494680851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-7-8b/e8c5d934-c9b6-460c-bd45-c4a3e2d26bed.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-7-8b/e8c5d934-c9b6-460c-bd45-c4a3e2d26bed.json
deleted file mode 100644
index 6e4770307095a1befcc71b110dbbe1d63c2ff6ed..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-7-8b/e8c5d934-c9b6-460c-bd45-c4a3e2d26bed.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_f-7-8b/1762652580.2664478",
- "retrieved_timestamp": "1762652580.266449",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/f-7-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/f-7-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4462337708391512
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5277022085059414
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12386706948640483
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4315104166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39361702127659576
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-8-8b/dad898e1-ee18-4864-b432-462d17ac8006.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-8-8b/dad898e1-ee18-4864-b432-462d17ac8006.json
deleted file mode 100644
index d49e6fbb5d28465258eff4743186166ae3454d0f..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-8-8b/dad898e1-ee18-4864-b432-462d17ac8006.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_f-8-8b/1762652580.266931",
- "retrieved_timestamp": "1762652580.266932",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/f-8-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/f-8-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4739358236146758
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5259311478463803
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12235649546827794
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30956375838926176
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43544791666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39403257978723405
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-9-8b/1373c279-13b7-46d3-94a4-7b47c9319f88.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-9-8b/1373c279-13b7-46d3-94a4-7b47c9319f88.json
deleted file mode 100644
index ec7913e9ed9065244fd6ef2099af95b56e585257..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_f-9-8b/1373c279-13b7-46d3-94a4-7b47c9319f88.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_f-9-8b/1762652580.267217",
- "retrieved_timestamp": "1762652580.2672179",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/f-9-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/f-9-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4601723427173233
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5291558412946383
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1299093655589124
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3062080536912752
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44608333333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3943650265957447
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fct-14-8b/22c3022f-d538-4a4d-8d4b-05e915506451.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fct-14-8b/22c3022f-d538-4a4d-8d4b-05e915506451.json
deleted file mode 100644
index c265b7c6ad960847bcbbf66b65a00a0d55221a18..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fct-14-8b/22c3022f-d538-4a4d-8d4b-05e915506451.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_fct-14-8b/1762652580.2674618",
- "retrieved_timestamp": "1762652580.267463",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/fct-14-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/fct-14-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4128612082607481
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5206018889288543
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12009063444108761
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3162751677852349
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4185520833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3875498670212766
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fct-9-8b/4d1ddf64-4626-4877-a0fa-84e06f6cf977.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fct-9-8b/4d1ddf64-4626-4877-a0fa-84e06f6cf977.json
deleted file mode 100644
index 93f909483c6b2b71edb496877c469fd491ceb6d2..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fct-9-8b/4d1ddf64-4626-4877-a0fa-84e06f6cf977.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_fct-9-8b/1762652580.267691",
- "retrieved_timestamp": "1762652580.267692",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/fct-9-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/fct-9-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4353925362482657
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.520510244410076
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11933534743202417
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42906249999999996
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.39320146276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fr-1-8b/2014c198-5e12-41ef-8f65-7321d0423573.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fr-1-8b/2014c198-5e12-41ef-8f65-7321d0423573.json
deleted file mode 100644
index 8677355713559d9050ae1bc38cfadde62be7ff79..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fr-1-8b/2014c198-5e12-41ef-8f65-7321d0423573.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_fr-1-8b/1762652580.267912",
- "retrieved_timestamp": "1762652580.2679129",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/fr-1-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/fr-1-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.421079402651631
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5142290494968609
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11178247734138973
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4276979166666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.36103723404255317
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fr-10-8b/725e5a72-548f-46d0-b268-12209e5cb085.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fr-10-8b/725e5a72-548f-46d0-b268-12209e5cb085.json
deleted file mode 100644
index 4b0629e107a5150f634b9d37c321487acc3a9be9..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fr-10-8b/725e5a72-548f-46d0-b268-12209e5cb085.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_fr-10-8b/1762652580.268136",
- "retrieved_timestamp": "1762652580.268136",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/fr-10-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/fr-10-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44018869931781013
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5206624978702634
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12235649546827794
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31711409395973156
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4118541666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3863031914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fr-3-8b/8bdd1aba-81e4-44d1-acfd-6efeaf391ac8.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fr-3-8b/8bdd1aba-81e4-44d1-acfd-6efeaf391ac8.json
deleted file mode 100644
index 36cd34ef2f1d283da58bc5d45a3cbf1e1d935251..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_fr-3-8b/8bdd1aba-81e4-44d1-acfd-6efeaf391ac8.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_fr-3-8b/1762652580.268359",
- "retrieved_timestamp": "1762652580.26836",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/fr-3-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/fr-3-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4325700253106203
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5255174690526301
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11329305135951662
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41982291666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3863031914893617
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_gamma-Kosmos-EVAA-8B/6e5584a8-5b8e-48ce-8b80-2d39a74a9b0d.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_gamma-Kosmos-EVAA-8B/6e5584a8-5b8e-48ce-8b80-2d39a74a9b0d.json
deleted file mode 100644
index b16c7d7baf8715ebaaa0d3f62cc82f05b371df25..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_gamma-Kosmos-EVAA-8B/6e5584a8-5b8e-48ce-8b80-2d39a74a9b0d.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_gamma-Kosmos-EVAA-8B/1762652580.268576",
- "retrieved_timestamp": "1762652580.268577",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/gamma-Kosmos-EVAA-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/gamma-Kosmos-EVAA-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42500121898784116
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5252624326543692
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08987915407854985
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.44115624999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37757646276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_gamma-Kosmos-EVAA-v2-8B/67f972e1-4ebd-4b78-b740-fdc03ac88aac.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_gamma-Kosmos-EVAA-v2-8B/67f972e1-4ebd-4b78-b740-fdc03ac88aac.json
deleted file mode 100644
index c9fbde590d4aed1b836ce41d85dc30667605982c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_gamma-Kosmos-EVAA-v2-8B/67f972e1-4ebd-4b78-b740-fdc03ac88aac.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_gamma-Kosmos-EVAA-v2-8B/1762652580.268805",
- "retrieved_timestamp": "1762652580.268806",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/gamma-Kosmos-EVAA-v2-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/gamma-Kosmos-EVAA-v2-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4232525255211727
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5262464083930688
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.10574018126888217
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.32046979865771813
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4343958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3755817819148936
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_gamma-Kosmos-EVAA-v3-8B/d461545f-ebcb-49e2-94ce-a6591e31a94a.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_gamma-Kosmos-EVAA-v3-8B/d461545f-ebcb-49e2-94ce-a6591e31a94a.json
deleted file mode 100644
index 8162acbc7076b9331baee2f8abcccb6524083309..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_gamma-Kosmos-EVAA-v3-8B/d461545f-ebcb-49e2-94ce-a6591e31a94a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_gamma-Kosmos-EVAA-v3-8B/1762652580.269119",
- "retrieved_timestamp": "1762652580.26912",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/gamma-Kosmos-EVAA-v3-8B",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/gamma-Kosmos-EVAA-v3-8B"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.43326928106313467
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.527793553969925
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11102719033232629
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31291946308724833
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4263020833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3897938829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_knf-2-8b/267e641c-7fbd-40d3-a9b7-eb3621240b2a.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_knf-2-8b/267e641c-7fbd-40d3-a9b7-eb3621240b2a.json
deleted file mode 100644
index d8f91ae50a45fefbc6e4418b59263772efd6584e..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_knf-2-8b/267e641c-7fbd-40d3-a9b7-eb3621240b2a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_knf-2-8b/1762652580.269415",
- "retrieved_timestamp": "1762652580.2694159",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/knf-2-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/knf-2-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42500121898784116
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5206718655559387
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12009063444108761
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3104026845637584
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4185208333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3874667553191489
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_knfp-2-8b/0bd6a333-afc0-43a4-9d14-fa44c2364184.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_knfp-2-8b/0bd6a333-afc0-43a4-9d14-fa44c2364184.json
deleted file mode 100644
index 618cb4a561b7dc1d9b2c7ea9e856767a932790e0..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_knfp-2-8b/0bd6a333-afc0-43a4-9d14-fa44c2364184.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_knfp-2-8b/1762652580.2696629",
- "retrieved_timestamp": "1762652580.269664",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/knfp-2-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/knfp-2-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5327120928026525
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5304878011708133
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.14274924471299094
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29278523489932884
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4184583333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.37258976063829785
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_knfp-3-8b/38a5c599-a098-42f4-a7cb-acee487e382a.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_knfp-3-8b/38a5c599-a098-42f4-a7cb-acee487e382a.json
deleted file mode 100644
index 2532402609f89e74e077ca1d380b2dc2edd2b078..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_knfp-3-8b/38a5c599-a098-42f4-a7cb-acee487e382a.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_knfp-3-8b/1762652580.2700531",
- "retrieved_timestamp": "1762652580.2700539",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/knfp-3-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/knfp-3-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49456885508229276
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5199790073136731
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12235649546827794
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3070469798657718
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.41712499999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3881316489361702
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-1-8b/cd7e14cb-b1f1-47d8-81a9-960da8ac4e05.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-1-8b/cd7e14cb-b1f1-47d8-81a9-960da8ac4e05.json
deleted file mode 100644
index 96bb72cd2f85c2a9c3917ff42f9ba1d0f16a86fe..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-1-8b/cd7e14cb-b1f1-47d8-81a9-960da8ac4e05.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-1-8b/1762652580.2702851",
- "retrieved_timestamp": "1762652580.270286",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/kstc-1-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/kstc-1-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4642936297911763
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5209048705325947
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.11706948640483383
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31711409395973156
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4157916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3892121010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-11-8b/41b46842-dffa-4791-8225-99d676f563c9.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-11-8b/41b46842-dffa-4791-8225-99d676f563c9.json
deleted file mode 100644
index 93334c07fbc5f8459a672995f6208f75db13f7ee..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-11-8b/41b46842-dffa-4791-8225-99d676f563c9.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-11-8b/1762652580.270522",
- "retrieved_timestamp": "1762652580.270522",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/kstc-11-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/kstc-11-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4757343847657549
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5189389675805397
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12009063444108761
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3028523489932886
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4117604166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3878823138297872
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-4-8b/6b63598f-4891-4b71-99ca-bc56b780d829.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-4-8b/6b63598f-4891-4b71-99ca-bc56b780d829.json
deleted file mode 100644
index 58bb003d416d3beb493bbf81e983215ac02a6218..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-4-8b/6b63598f-4891-4b71-99ca-bc56b780d829.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-4-8b/1762652580.270735",
- "retrieved_timestamp": "1762652580.270736",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/kstc-4-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/kstc-4-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4769832932175517
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5216059333020012
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12386706948640483
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4117916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3868849734042553
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-5-8b/ea79ca75-c55b-457a-b952-528a22567dbb.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-5-8b/ea79ca75-c55b-457a-b952-528a22567dbb.json
deleted file mode 100644
index d36f019366b8d344714e938f60f9f3503b4590f6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-5-8b/ea79ca75-c55b-457a-b952-528a22567dbb.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-5-8b/1762652580.270952",
- "retrieved_timestamp": "1762652580.270953",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/kstc-5-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/kstc-5-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.47208739477918593
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5211438914491455
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.1299093655589124
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.31543624161073824
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4223958333333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3892121010638298
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-6-8b/f7d63a4b-070d-4581-acce-cd356a3dea47.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-6-8b/f7d63a4b-070d-4581-acce-cd356a3dea47.json
deleted file mode 100644
index 19241db76f459c7b50e0daa1959900cf0a031b1c..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-6-8b/f7d63a4b-070d-4581-acce-cd356a3dea47.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-6-8b/1762652580.2711701",
- "retrieved_timestamp": "1762652580.2711701",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/kstc-6-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/kstc-6-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49439376410147295
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5230977287748603
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.12462235649546828
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.29949664429530204
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4104895833333333
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3857214095744681
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-8-8b/85502cb7-db11-43ce-a3cf-f9329ecec2e1.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-8-8b/85502cb7-db11-43ce-a3cf-f9329ecec2e1.json
deleted file mode 100644
index 76709f3fccff873a91c47fe091fd369ca9775443..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-8-8b/85502cb7-db11-43ce-a3cf-f9329ecec2e1.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-8-8b/1762652580.271383",
- "retrieved_timestamp": "1762652580.271384",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/kstc-8-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/kstc-8-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.49097173278013445
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5238910223750602
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13066465256797583
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3053691275167785
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.42112499999999997
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3888796542553192
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-9-8b/5f36e182-fa70-41d9-9cc6-12367035fc76.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-9-8b/5f36e182-fa70-41d9-9cc6-12367035fc76.json
deleted file mode 100644
index 9e3959758bffb49da2a9f6d10bc3990820648b6d..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_kstc-9-8b/5f36e182-fa70-41d9-9cc6-12367035fc76.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_kstc-9-8b/1762652580.27159",
- "retrieved_timestamp": "1762652580.27159",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/kstc-9-8b",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/kstc-9-8b"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4860758343417687
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5238366551736342
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.13595166163141995
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3011744966442953
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4117916666666666
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38721742021276595
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-10/79c255e5-8a6b-4afd-a03e-e35cbcbcc712.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-10/79c255e5-8a6b-4afd-a03e-e35cbcbcc712.json
deleted file mode 100644
index 4aca0bb19f1d1e4736dfbc6306ed6ad063ca0038..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-10/79c255e5-8a6b-4afd-a03e-e35cbcbcc712.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_slu-10/1762652580.271806",
- "retrieved_timestamp": "1762652580.271807",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/slu-10",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/slu-10"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4359920566319587
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5096469529197213
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09743202416918428
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.313758389261745
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3920104166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3663563829787234
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-11/0091eabc-3888-4e1a-a29d-8c4e98b599f2.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-11/0091eabc-3888-4e1a-a29d-8c4e98b599f2.json
deleted file mode 100644
index 683aa6b30b56b47f8d1a18a5f1d986f925168759..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-11/0091eabc-3888-4e1a-a29d-8c4e98b599f2.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_slu-11/1762652580.272018",
- "retrieved_timestamp": "1762652580.272018",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/slu-11",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/slu-11"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.372519359743259
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4890236865115587
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.055891238670694864
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3036912751677852
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3919479166666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.33818151595744683
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-13/1a1eaa84-9926-4c4b-b254-96cd667c25ac.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-13/1a1eaa84-9926-4c4b-b254-96cd667c25ac.json
deleted file mode 100644
index 895ad086ad49781aa843aec3ddc3b0f73a6de7e6..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-13/1a1eaa84-9926-4c4b-b254-96cd667c25ac.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_slu-13/1762652580.272234",
- "retrieved_timestamp": "1762652580.272237",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/slu-13",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/slu-13"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4378404854674486
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5097334543819346
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.08081570996978851
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.38140625
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.35804521276595747
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-14/59703023-61e1-4df0-8542-703d5a318756.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-14/59703023-61e1-4df0-8542-703d5a318756.json
deleted file mode 100644
index 335e9416b6f7a6a3bef8f82f4baddde2f6284719..0000000000000000000000000000000000000000
--- a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-14/59703023-61e1-4df0-8542-703d5a318756.json
+++ /dev/null
@@ -1,107 +0,0 @@
-{
- "schema_version": "0.0.1",
- "evaluation_id": "hfopenllm_v2/jaspionjader_slu-14/1762652580.27245",
- "retrieved_timestamp": "1762652580.2724512",
- "source_data": [
- "https://open-llm-leaderboard-open-llm-leaderboard.hf.space/api/leaderboard/formatted"
- ],
- "evaluation_source": {
- "evaluation_source_name": "HF Open LLM v2",
- "evaluation_source_type": "leaderboard"
- },
- "source_metadata": {
- "source_organization_name": "Hugging Face",
- "evaluator_relationship": "third_party"
- },
- "model_info": {
- "name": "jaspionjader/slu-14",
- "developer": "jaspionjader",
- "inference_platform": "unknown",
- "id": "jaspionjader/slu-14"
- },
- "evaluation_results": [
- {
- "evaluation_name": "IFEval",
- "metric_config": {
- "evaluation_description": "Accuracy on IFEval",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.4106880853912065
- }
- },
- {
- "evaluation_name": "BBH",
- "metric_config": {
- "evaluation_description": "Accuracy on BBH",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.5088505978489455
- }
- },
- {
- "evaluation_name": "MATH Level 5",
- "metric_config": {
- "evaluation_description": "Exact Match on MATH Level 5",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.09743202416918428
- }
- },
- {
- "evaluation_name": "GPQA",
- "metric_config": {
- "evaluation_description": "Accuracy on GPQA",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.30788590604026844
- }
- },
- {
- "evaluation_name": "MUSR",
- "metric_config": {
- "evaluation_description": "Accuracy on MUSR",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3960416666666667
- }
- },
- {
- "evaluation_name": "MMLU-PRO",
- "metric_config": {
- "evaluation_description": "Accuracy on MMLU-PRO",
- "lower_is_better": false,
- "score_type": "continuous",
- "min_score": 0,
- "max_score": 1
- },
- "score_details": {
- "score": 0.3626994680851064
- }
- }
- ],
- "additional_details": {
- "precision": "bfloat16",
- "architecture": "LlamaForCausalLM",
- "params_billions": 8.03
- }
-}
\ No newline at end of file
diff --git a/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspionjader_slu-17/fea528ae-4015-4adf-bce0-f9775554cc5f.json b/leaderboard_data/HFOpenLLMv2/jaspionjader/jaspion